statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -5,30 +5,32 @@ class StatsampleReliabilitySkillScaleTestCase < Minitest::Test
5
5
  setup do
6
6
  options = %w(a b c d e)
7
7
  cases = 20
8
- @id = cases.times.map { |v| v }.to_numeric
9
- @a = cases.times.map { options[rand(5)] }.to_vector
10
- @b = cases.times.map { options[rand(5)] }.to_vector
11
- @c = cases.times.map { options[rand(5)] }.to_vector
12
- @d = cases.times.map { options[rand(5)] }.to_vector
13
- @e = cases.times.map {|i|
14
- i == 0 ? options[rand(0)] :
8
+ @id = Daru::Vector.new(cases.times.map { |v| v })
9
+ @a = Daru::Vector.new(cases.times.map { options[rand(5)] })
10
+ @b = Daru::Vector.new(cases.times.map { options[rand(5)] })
11
+ @c = Daru::Vector.new(cases.times.map { options[rand(5)] })
12
+ @d = Daru::Vector.new(cases.times.map { options[rand(5)] })
13
+ @e = Daru::Vector.new(
14
+ cases.times.map do |i|
15
+ i == 0 ? options[rand(0)] :
15
16
  rand > 0.8 ? nil : options[rand(5)]
16
- }.to_vector
17
- @ds = { 'id' => @id, 'a' => @a, 'b' => @b, 'c' => @c, 'd' => @d, 'e' => @e }.to_dataset
18
- @key = { 'a' => 'a', 'b' => options[rand(5)], 'c' => options[rand(5)], 'd' => options[rand(5)], 'e' => options[rand(5)] }
17
+ end
18
+ )
19
+ @ds = Daru::DataFrame.new({ :id => @id, :a => @a, :b => @b, :c => @c, :d => @d, :e => @e })
20
+ @key = { :a => 'a', :b => options[rand(5)], :c => options[rand(5)], :d => options[rand(5)], :e => options[rand(5)] }
19
21
  @ssa = Statsample::Reliability::SkillScaleAnalysis.new(@ds, @key)
20
- @ac = @a.map { |v| v == @key['a'] ? 1 : 0 }.to_numeric
21
- @bc = @b.map { |v| v == @key['b'] ? 1 : 0 }.to_numeric
22
- @cc = @c.map { |v| v == @key['c'] ? 1 : 0 }.to_numeric
23
- @dc = @d.map { |v| v == @key['d'] ? 1 : 0 }.to_numeric
24
- @ec = @e.map { |v| v.nil? ? nil : (v == @key['e'] ? 1 : 0) }.to_numeric
22
+ @ac = Daru::Vector.new(@a.map { |v| v == @key[:a] ? 1 : 0 })
23
+ @bc = Daru::Vector.new(@b.map { |v| v == @key[:b] ? 1 : 0 })
24
+ @cc = Daru::Vector.new(@c.map { |v| v == @key[:c] ? 1 : 0 })
25
+ @dc = Daru::Vector.new(@d.map { |v| v == @key[:d] ? 1 : 0 })
26
+ @ec = Daru::Vector.new(@e.map { |v| v.nil? ? nil : (v == @key[:e] ? 1 : 0) })
25
27
  end
26
28
  should 'return proper corrected dataset' do
27
- cds = { 'id' => @id, 'a' => @ac, 'b' => @bc, 'c' => @cc, 'd' => @dc, 'e' => @ec }.to_dataset
29
+ cds = Daru::DataFrame.new({ :id => @id, :a => @ac, :b => @bc, :c => @cc, :d => @dc, :e => @ec })
28
30
  assert_equal(cds, @ssa.corrected_dataset)
29
31
  end
30
32
  should 'return proper corrected minimal dataset' do
31
- cdsm = { 'a' => @ac, 'b' => @bc, 'c' => @cc, 'd' => @dc, 'e' => @ec }.to_dataset
33
+ cdsm = Daru::DataFrame.new({ :a => @ac, :b => @bc, :c => @cc, :d => @dc, :e => @ec })
32
34
  assert_equal(cdsm, @ssa.corrected_dataset_minimal)
33
35
  end
34
36
  should 'return correct vector_sum and vector_sum' do
@@ -37,13 +39,13 @@ class StatsampleReliabilitySkillScaleTestCase < Minitest::Test
37
39
  assert_equal(cdsm.vector_mean, @ssa.vector_mean)
38
40
  end
39
41
  should 'not crash on rare case' do
40
- a = Statsample::Vector['c', 'c', 'a', 'a', 'c', 'a', 'b', 'c', 'c', 'b', 'a', 'd', 'a', 'd', 'a', 'a', 'd', 'e', 'c', 'd']
41
- b = Statsample::Vector['e', 'b', 'e', 'b', 'c', 'd', 'a', 'e', 'e', 'c', 'b', 'e', 'e', 'b', 'd', 'c', 'e', 'b', 'b', 'd']
42
- c = Statsample::Vector['e', 'b', 'e', 'c', 'e', 'c', 'b', 'd', 'e', 'c', 'a', 'a', 'b', 'd', 'e', 'c', 'b', 'a', 'a', 'e']
43
- d = Statsample::Vector['a', 'b', 'd', 'd', 'e', 'b', 'e', 'b', 'd', 'c', 'e', 'a', 'c', 'd', 'c', 'c', 'e', 'd', 'd', 'b']
44
- e = Statsample::Vector['a', 'b', nil, 'd', 'c', 'c', 'd', nil, 'd', 'd', 'e', 'e', nil, nil, nil, 'd', 'c', nil, 'e', 'd']
45
- key = { 'a' => 'a', 'b' => 'e', 'c' => 'd', 'd' => 'c', 'e' => 'd' }
46
- ds = Statsample::Dataset.new('a' => a, 'b' => b, 'c' => c, 'd' => d, 'e' => e)
42
+ a = Daru::Vector.new(['c', 'c', 'a', 'a', 'c', 'a', 'b', 'c', 'c', 'b', 'a', 'd', 'a', 'd', 'a', 'a', 'd', 'e', 'c', 'd'])
43
+ b = Daru::Vector.new(['e', 'b', 'e', 'b', 'c', 'd', 'a', 'e', 'e', 'c', 'b', 'e', 'e', 'b', 'd', 'c', 'e', 'b', 'b', 'd'])
44
+ c = Daru::Vector.new(['e', 'b', 'e', 'c', 'e', 'c', 'b', 'd', 'e', 'c', 'a', 'a', 'b', 'd', 'e', 'c', 'b', 'a', 'a', 'e'])
45
+ d = Daru::Vector.new(['a', 'b', 'd', 'd', 'e', 'b', 'e', 'b', 'd', 'c', 'e', 'a', 'c', 'd', 'c', 'c', 'e', 'd', 'd', 'b'])
46
+ e = Daru::Vector.new(['a', 'b', nil, 'd', 'c', 'c', 'd', nil, 'd', 'd', 'e', 'e', nil, nil, nil, 'd', 'c', nil, 'e', 'd'])
47
+ key = { :a => 'a', :b => 'e', :c => 'd', :d => 'c', :e => 'd' }
48
+ ds = Daru::DataFrame.new({:a => a, :b => b, :c => c, :d => d, :e => e})
47
49
  ssa = Statsample::Reliability::SkillScaleAnalysis.new(ds, key)
48
50
  assert(ssa.summary)
49
51
  end
@@ -17,7 +17,7 @@ class StatsampleResampleTestCase < Minitest::Test
17
17
  Statsample::Resample.generate(20, 1, 10).count(1)
18
18
  }
19
19
  assert_equal(400, r.size)
20
- v = Statsample::Vector.new(r, :numeric)
20
+ v = Daru::Vector.new(r)
21
21
  a = v.count { |x| x > 3 }
22
22
  assert(a >= 30 && a <= 70)
23
23
  end
@@ -32,7 +32,7 @@ class StatsampleStatisicsTestCase < Minitest::Test
32
32
  end
33
33
 
34
34
  def test_estimation_mean
35
- v = ([42] * 23 + [41] * 4 + [36] * 1 + [32] * 1 + [29] * 1 + [27] * 2 + [23] * 1 + [19] * 1 + [16] * 2 + [15] * 2 + [14, 11, 10, 9, 7] + [6] * 3 + [5] * 2 + [4, 3]).to_vector(:numeric)
35
+ v = Daru::Vector.new([42] * 23 + [41] * 4 + [36] * 1 + [32] * 1 + [29] * 1 + [27] * 2 + [23] * 1 + [19] * 1 + [16] * 2 + [15] * 2 + [14, 11, 10, 9, 7] + [6] * 3 + [5] * 2 + [4, 3])
36
36
  assert_equal(50, v.size)
37
37
  assert_equal(1471, v.sum)
38
38
  # limits=Statsample::SRS.mean_confidence_interval_z(v.mean(), v.sds(), v.size,676,0.80)
@@ -55,19 +55,9 @@ class StatsampleStatisicsTestCase < Minitest::Test
55
55
  assert_in_delta(0.46, l[1], 0.01)
56
56
  end
57
57
 
58
- def test_ml
59
- if true
60
- # real=[1,1,1,1].to_vector(:numeric)
61
-
62
- # pred=[0.0001,0.0001,0.0001,0.0001].to_vector(:numeric)
63
- # puts Statsample::Bivariate.maximum_likehood_dichotomic(pred,real)
64
-
65
- end
66
- end
67
-
68
58
  def test_simple_linear_regression
69
- a = [1, 2, 3, 4, 5, 6].to_vector(:numeric)
70
- b = [6, 2, 4, 10, 12, 8].to_vector(:numeric)
59
+ a = Daru::Vector.new([1, 2, 3, 4, 5, 6])
60
+ b = Daru::Vector.new([6, 2, 4, 10, 12, 8])
71
61
  reg = Statsample::Regression::Simple.new_from_vectors(a, b)
72
62
  assert_in_delta((reg.ssr + reg.sse).to_f, reg.sst, 0.001)
73
63
  assert_in_delta(Statsample::Bivariate.pearson(a, b), reg.r, 0.001)
@@ -24,26 +24,26 @@ class StatsampleTestTestCase < Minitest::Test
24
24
  end
25
25
 
26
26
  def test_u_mannwhitney
27
- a = [1, 2, 3, 4, 5, 6].to_numeric
28
- b = [0, 5, 7, 9, 10, 11].to_numeric
27
+ a = Daru::Vector.new([1, 2, 3, 4, 5, 6])
28
+ b = Daru::Vector.new([0, 5, 7, 9, 10, 11])
29
29
  assert_equal(7.5, Statsample::Test.u_mannwhitney(a, b).u)
30
30
  assert_equal(7.5, Statsample::Test.u_mannwhitney(b, a).u)
31
- a = [1, 7, 8, 9, 10, 11].to_numeric
32
- b = [2, 3, 4, 5, 6, 12].to_numeric
31
+ a = Daru::Vector.new([1, 7, 8, 9, 10, 11])
32
+ b = Daru::Vector.new([2, 3, 4, 5, 6, 12])
33
33
  assert_equal(11, Statsample::Test.u_mannwhitney(a, b).u)
34
34
  end
35
35
 
36
36
  def test_levene
37
- a = [1, 2, 3, 4, 5, 6, 7, 8, 100, 10].to_numeric
38
- b = [30, 40, 50, 60, 70, 80, 90, 100, 110, 120].to_numeric
37
+ a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 100, 10])
38
+ b = Daru::Vector.new([30, 40, 50, 60, 70, 80, 90, 100, 110, 120])
39
39
  levene = Statsample::Test::Levene.new([a, b])
40
40
  assert_levene(levene)
41
41
  end
42
42
 
43
43
  def test_levene_dataset
44
- a = [1, 2, 3, 4, 5, 6, 7, 8, 100, 10].to_numeric
45
- b = [30, 40, 50, 60, 70, 80, 90, 100, 110, 120].to_numeric
46
- ds = { 'a' => a, 'b' => b }.to_dataset
44
+ a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 100, 10])
45
+ b = Daru::Vector.new([30, 40, 50, 60, 70, 80, 90, 100, 110, 120])
46
+ ds = Daru::DataFrame.new({ :a => a, :b => b })
47
47
  levene = Statsample::Test::Levene.new(ds)
48
48
  assert_levene(levene)
49
49
  end
@@ -9,9 +9,9 @@ class StatsampleStratifiedTestCase < Minitest::Test
9
9
  a = [10, 20, 30, 40, 50]
10
10
  b = [110, 120, 130, 140]
11
11
  pop = a + b
12
- av = a.to_vector(:numeric)
13
- bv = b.to_vector(:numeric)
14
- popv = pop.to_vector(:numeric)
12
+ av = Daru::Vector.new(a)
13
+ bv = Daru::Vector.new(b)
14
+ popv = Daru::Vector.new(pop)
15
15
  assert_equal(popv.mean, Statsample::StratifiedSample.mean(av, bv))
16
16
  end
17
17
  end
@@ -4,24 +4,24 @@ class StatsampleTestTTestCase < Minitest::Test
4
4
  include Math
5
5
  context T do
6
6
  setup do
7
- @a = [30.02, 29.99, 30.11, 29.97, 30.01, 29.99].to_numeric
8
- @b = [29.89, 29.93, 29.72, 29.98, 30.02, 29.98].to_numeric
7
+ @a = Daru::Vector.new([30.02, 29.99, 30.11, 29.97, 30.01, 29.99])
8
+ @b = Daru::Vector.new([29.89, 29.93, 29.72, 29.98, 30.02, 29.98])
9
9
  @x1 = @a.mean
10
10
  @x2 = @b.mean
11
11
  @s1 = @a.sd
12
12
  @s2 = @b.sd
13
- @n1 = @a.n
14
- @n2 = @b.n
13
+ @n1 = @a.size
14
+ @n2 = @b.size
15
15
  end
16
16
  should 'calculate correctly standard t' do
17
- t = Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.n)), @a.n - 1)
18
- assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.n))), t.t)
19
- assert_equal(@a.n - 1, t.df)
17
+ t = Statsample::Test::T.new(@x1, @s1.quo(Math.sqrt(@a.size)), @a.size - 1)
18
+ assert_equal((@x1).quo(@s1.quo(Math.sqrt(@a.size))), t.t)
19
+ assert_equal(@a.size - 1, t.df)
20
20
  assert(t.summary.size > 0)
21
21
  end
22
22
  should 'calculate correctly t for one sample' do
23
- t1 = [6, 4, 6, 7, 4, 5, 5, 12, 6, 1].to_numeric
24
- t2 = [9, 6, 5, 10, 10, 8, 7, 10, 6, 5].to_numeric
23
+ t1 = Daru::Vector.new([6, 4, 6, 7, 4, 5, 5, 12, 6, 1])
24
+ t2 = Daru::Vector.new([9, 6, 5, 10, 10, 8, 7, 10, 6, 5])
25
25
  d = t1 - t2
26
26
  t = Statsample::Test::T::OneSample.new(d)
27
27
  assert_in_delta(-2.631, t.t, 0.001)
@@ -48,14 +48,14 @@ class StatsampleTestTTestCase < Minitest::Test
48
48
  assert_in_delta(0.09095, t.probability_not_equal_variance, 0.001)
49
49
  end
50
50
  should 'be the same using shorthand' do
51
- v = 100.times.map { rand(100) }.to_numeric
51
+ v = Daru::Vector.new(100.times.map { rand(100) })
52
52
  assert_equal(Statsample::Test.t_one_sample(v).t, T::OneSample.new(v).t)
53
53
  end
54
54
  should 'calculate all values for one sample T test' do
55
55
  u = @a.mean + (1 - rand * 2)
56
56
  tos = T::OneSample.new(@a, u: u)
57
- assert_equal((@a.mean - u).quo(@a.sd.quo(sqrt(@a.n))), tos.t)
58
- assert_equal(@a.n - 1, tos.df)
57
+ assert_equal((@a.mean - u).quo(@a.sd.quo(sqrt(@a.size))), tos.t)
58
+ assert_equal(@a.size - 1, tos.df)
59
59
  assert(tos.summary.size > 0)
60
60
  end
61
61
  end
@@ -4,8 +4,8 @@ class StatsampleUMannWhitneyTestCase < Minitest::Test
4
4
  include Statsample::Test
5
5
  context Statsample::Test::UMannWhitney do
6
6
  setup do
7
- @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15].to_numeric
8
- @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19].to_numeric
7
+ @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15])
8
+ @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19])
9
9
  @u = Statsample::Test::UMannWhitney.new(@v1, @v2)
10
10
  end
11
11
  should 'have same result using class or Test#u_mannwhitney' do
@@ -3,24 +3,11 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
3
3
  class StatsampleTestVector < Minitest::Test
4
4
  include Statsample::Shorthand
5
5
 
6
- def setup
7
- @c = Statsample::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], :object)
8
- @c.name = 'Test Vector'
9
- @c.missing_values = [-99]
10
- end
11
-
12
- def assert_counting_tokens(b)
13
- assert_equal([1, 1, 0, 1, 0, nil], b['a'].to_a)
14
- assert_equal([0, 1, 0, 0, 0, nil], b['b'].to_a)
15
- assert_equal([0, 0, 1, 0, 0, nil], b['c'].to_a)
16
- assert_equal([0, 0, 1, 1, 0, nil], b['d'].to_a)
17
- assert_equal([0, 0, 0, 0, 1, nil], b[10].to_a)
18
- end
19
6
  context Statsample do
20
7
  setup do
21
8
  @sample = 100
22
- @a = @sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) }.to_numeric
23
- @b = @sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) }.to_numeric
9
+ @a = Daru::Vector.new(@sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) })
10
+ @b = Daru::Vector.new(@sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) })
24
11
  @correct_a = []
25
12
  @correct_b = []
26
13
  @a.each_with_index do |_v, i|
@@ -29,8 +16,8 @@ class StatsampleTestVector < Minitest::Test
29
16
  @correct_b.push(@b[i])
30
17
  end
31
18
  end
32
- @correct_a = @correct_a.to_numeric
33
- @correct_b = @correct_b.to_numeric
19
+ @correct_a = Daru::Vector.new(@correct_a)
20
+ @correct_b = Daru::Vector.new(@correct_b)
34
21
 
35
22
  @common = lambda do |av, bv|
36
23
  assert_equal(@correct_a, av, 'A no es esperado')
@@ -39,649 +26,125 @@ class StatsampleTestVector < Minitest::Test
39
26
  assert(!bv.has_missing_data?, 'b tiene datos faltantes')
40
27
  end
41
28
  end
29
+
42
30
  should 'return correct only_valid' do
43
31
  av, bv = Statsample.only_valid @a, @b
32
+ av.reset_index!
33
+ bv.reset_index!
44
34
  av2, bv2 = Statsample.only_valid av, bv
45
35
  @common.call(av, bv)
46
36
  assert_equal(av, av2)
47
37
  assert_not_same(av, av2)
48
38
  assert_not_same(bv, bv2)
49
39
  end
40
+
50
41
  should 'return correct only_valid_clone' do
51
42
  av, bv = Statsample.only_valid_clone @a, @b
43
+ av.reset_index!
44
+ bv.reset_index!
52
45
  @common.call(av, bv)
53
46
  av2, bv2 = Statsample.only_valid_clone av, bv
54
47
  assert_equal(av, av2)
55
48
  assert_same(av, av2)
56
49
  assert_same(bv, bv2)
57
50
  end
58
- end
59
- context Statsample::Vector do
60
- setup do
61
- @c = Statsample::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], :object)
62
- @c.name = 'Test Vector'
63
- @c.missing_values = [-99]
64
- end
65
- should_with_gsl 'be created with GSL::Vector' do
66
- gsl = GSL::Vector[1, 2, 3, 4, 5]
67
- v = Statsample::Vector.new(gsl)
68
- assert_equal([1, 2, 3, 4, 5], v.to_a)
69
- refute(v.flawed?)
70
- end
71
51
 
72
- context 'using matrix operations' do
73
- setup do
74
- @a = [1, 2, 3, 4, 5].to_numeric
75
- end
76
- should 'to_matrix returns a matrix with 1 row' do
77
- mh = Matrix[[1, 2, 3, 4, 5]]
78
- assert_equal(mh, @a.to_matrix)
79
- end
80
- should 'to_matrix(:vertical) returns a matrix with 1 column' do
81
- mv = Matrix.columns([[1, 2, 3, 4, 5]])
82
- assert_equal(mv, @a.to_matrix(:vertical))
83
- end
84
- should 'returns valid submatrixes' do
85
- # 3*4 + 2*5 = 22
86
- a = [3, 2].to_vector(:numeric)
87
- b = [4, 5].to_vector(:numeric)
88
- assert_equal(22, (a.to_matrix * b.to_matrix(:vertical))[0, 0])
89
- end
52
+ should 'returns correct vector_cols_matrix' do
53
+ v1 = Daru::Vector.new(%w(a a a b b b c c))
54
+ v2 = Daru::Vector.new(%w(1 3 4 5 6 4 3 2))
55
+ v3 = Daru::Vector.new(%w(1 0 0 0 1 1 1 0))
56
+ ex = Matrix.rows([%w(a 1 1), %w(a 3 0), %w(a 4 0), %w(b 5 0), %w(b 6 1), %w(b 4 1), %w(c 3 1), %w(c 2 0)])
57
+ assert_equal(ex, Statsample.vector_cols_matrix(v1, v2, v3))
90
58
  end
91
- context 'when initializing' do
92
- setup do
93
- @data = (10.times.map { rand(100) }) + [nil]
94
- @original = Statsample::Vector.new(@data, :numeric)
95
- end
96
- should 'be the sample using []' do
97
- second = Statsample::Vector[*@data]
98
- assert_equal(@original, second)
99
- end
100
- should '[] returns same results as R-c()' do
101
- reference = [0, 4, 5, 6, 10].to_numeric
102
- assert_equal(reference, Statsample::Vector[0, 4, 5, 6, 10])
103
- assert_equal(reference, Statsample::Vector[0, 4..6, 10])
104
- assert_equal(reference, Statsample::Vector[[0], [4, 5, 6], [10]])
105
- assert_equal(reference, Statsample::Vector[[0], [4, [5, [6]]], [10]])
106
-
107
- assert_equal(reference, Statsample::Vector[[0], [4, 5, 6].to_vector, [10]])
108
- end
109
- should 'be the same usign #to_vector' do
110
- lazy1 = @data.to_vector(:numeric)
111
- assert_equal(@original, lazy1)
112
- end
113
- should 'be the same using #to_numeric' do
114
- lazy2 = @data.to_numeric
115
- assert_equal(@original, lazy2)
116
- assert_equal(:numeric, lazy2.type)
117
- assert_equal(@data.find_all { |v| !v.nil? }, lazy2.valid_data)
118
- end
119
- should 'could use new_numeric with size only' do
120
- v1 = 10.times.map { nil }.to_numeric
121
- v2 = Statsample::Vector.new_numeric(10)
122
- assert_equal(v1, v2)
123
- end
124
- should 'could use new_numeric with size and value' do
125
- a = rand
126
- v1 = 10.times.map { a }.to_numeric
127
- v2 = Statsample::Vector.new_numeric(10, a)
128
- assert_equal(v1, v2)
129
- end
130
- should 'could use new_numeric with func' do
131
- v1 = 10.times.map { |i| i * 2 }.to_numeric
132
- v2 = Statsample::Vector.new_numeric(10) { |i| i * 2 }
133
- assert_equal(v1, v2)
134
- end
135
- end
136
-
137
- context "new types :numeric and :object" do
138
- should "set default type of vector to :object" do
139
- v = Statsample::Vector.new [1,2,3,4,5]
140
- assert_equal(:object, v.type)
141
- end
142
-
143
- should "initialize Vector with :numeric type" do
144
- v = Statsample::Vector.new [1,2,3,4,5,nil], :numeric
145
- assert_equal(:numeric, v.type)
146
- assert_equal([1,2,3,4,5], v.valid_data)
147
- end
148
-
149
- should "show a warning when initializing with :nominal, :numeric or :ordinal" do
150
- assert_output(nil,"WARNING: nominal has been deprecated. Use :object instead.\n") do
151
- Statsample::Vector.new [1,2,3,4,5,nil,'hello'], :nominal
152
- end
153
-
154
- assert_output(nil,"WARNING: scale has been deprecated. Use :numeric instead.\n") do
155
- Statsample::Vector.new [1,2,3,4,nil,5], :scale
156
- end
157
-
158
- assert_output(nil,"WARNING: ordinal has been deprecated. Use :numeric instead.\n") do
159
- Statsample::Vector.new [1,2,3,4,5], :ordinal
160
- end
59
+ end
161
60
 
162
- assert_output(nil,"WARNING: .new_scale has been deprecated. Use .new_numeric instead.\n") do
163
- Statsample::Vector.new_scale 10, 1
61
+ context Statsample::Vector do
62
+ context 'when initializing' do
63
+ should '.new creates a Daru::Vector internally and shows a warning' do
64
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
65
+ data = (10.times.map { rand(100) }) + [nil]
66
+ original = Statsample::Vector.new(@data, :numeric)
67
+ assert_equal(true, original.kind_of?(Daru::Vector))
164
68
  end
165
69
  end
166
70
 
167
- should "test that new shorthands work" do
168
- numeric = Statsample::Vector.new([1,2,3,4,nil,5], :numeric)
169
- assert_equal(numeric, [1,2,3,4,nil,5].to_numeric)
170
- assert_equal(numeric, [1,2,3,4,nil,5].to_vector(:numeric))
171
-
172
- obj = Statsample::Vector.new([1,2,3,4,'one','two'], :object)
173
- assert_equal(obj, [1,2,3,4,'one','two'].to_vector(:object))
174
- end
175
-
176
- should "test that old shorthands raise warnings" do
177
- assert_output(nil,"WARNING: to_scale has been deprecated. Use to_numeric instead.\n") do
178
- [1,2,3,4,nil,5].to_scale
71
+ should '[] returns same results as R-c()' do
72
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
73
+ assert_equal(true, Statsample::Vector[1,2,3,4,5].kind_of?(Daru::Vector))
179
74
  end
180
75
  end
181
- end
182
76
 
183
- context '#split_by_separator' do
184
- setup do
185
- @a = Statsample::Vector.new(['a', 'a,b', 'c,d', 'a,d', 10, nil], :object)
186
- @b = @a.split_by_separator(',')
187
- end
188
- should 'returns a Hash' do
189
- assert_kind_of(Hash, @b)
190
- end
191
- should 'return a Hash with keys with different values of @a' do
192
- expected = ['a', 'b', 'c', 'd', 10]
193
- assert_equal(expected, @b.keys)
194
- end
77
+ should "new_numeric/new_scale creates a Daru::Vector internally and shows a warning" do
78
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
79
+ Statsample::Vector.new_scale(4)
80
+ end
195
81
 
196
- should 'returns a Hash, which values are Statsample::Vector' do
197
- @b.each_key { |k| assert_instance_of(Statsample::Vector, @b[k]) }
198
- end
199
- should 'hash values are n times the tokens appears' do
200
- assert_counting_tokens(@b)
201
- end
202
- should '#split_by_separator_freq returns the number of ocurrences of tokens' do
203
- assert_equal({ 'a' => 3, 'b' => 1, 'c' => 1, 'd' => 2, 10 => 1 }, @a.split_by_separator_freq)
204
- end
205
- should 'using a different separator give the same values' do
206
- a = Statsample::Vector.new(['a', 'a*b', 'c*d', 'a*d', 10, nil], :object)
207
- b = a.split_by_separator('*')
208
- assert_counting_tokens(b)
209
- end
210
- end
211
- should 'return correct median_absolute_deviation' do
212
- a = [1, 1, 2, 2, 4, 6, 9].to_numeric
213
- assert_equal(1, a.median_absolute_deviation)
214
- end
215
- should 'return correct histogram' do
216
- a = 10.times.map { |v| v }.to_numeric
217
- hist = a.histogram(2)
218
- assert_equal([5, 5], hist.bin)
219
- 3.times do |i|
220
- assert_in_delta(i * 4.5, hist.get_range(i)[0], 1e-9)
82
+ assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
83
+ Statsample::Vector.new_numeric(4)
84
+ end
221
85
  end
222
86
  end
223
- should 'have a name' do
224
- @c.name == 'Test Vector'
225
- end
226
- should 'without explicit name, returns vector with succesive numbers' do
227
- a = 10.times.map { rand(100) }.to_numeric
228
- b = 10.times.map { rand(100) }.to_numeric
229
- assert_match(/Vector \d+/, a.name)
230
- a.name =~ /Vector (\d+)/
231
- next_number = Regexp.last_match(1).to_i + 1
232
- assert_equal("Vector #{next_number}", b.name)
233
- end
234
- should 'save to a file and load the same Vector' do
235
- outfile = Tempfile.new('vector.vec')
236
- @c.save(outfile.path)
237
- a = Statsample.load(outfile.path)
238
- assert_equal(@c, a)
239
- end
240
- should '#collect returns an array' do
241
- val = @c.collect { |v| v }
242
- assert_equal(val, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
243
- end
87
+ end
244
88
 
245
- should '#recode returns a recoded array' do
246
- a = @c.recode { |v| @c.is_valid?(v) ? 0 : 1 }
247
- exp = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1].to_vector
248
- assert_equal(exp, a)
249
- exp.recode! { |v| v == 0 ? 1 : 0 }
250
- exp2 = (([1] * 15) + ([0] * 3)).to_vector
251
- assert_equal(exp2, exp)
252
- end
253
- should '#product returns the * of all values' do
254
- a = [1, 2, 3, 4, 5].to_vector(:numeric)
255
- assert_equal(120, a.product)
89
+ context "new types :numeric and :object" do
90
+ should "numerical data is automatically detected to be of type :numeric" do
91
+ v = Statsample::Vector.new [1,2,3,4,5,nil]
92
+ assert_equal(:numeric, v.type)
256
93
  end
257
94
 
258
- should 'missing values' do
259
- @c.missing_values = [10]
260
- assert_equal([-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9], @c.valid_data.sort)
261
- assert_equal([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, nil, 1, 2, 3, 4, nil, -99, -99], @c.data_with_nils)
262
- @c.missing_values = [-99]
263
- assert_equal(@c.valid_data.sort, [1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10])
264
- assert_equal(@c.data_with_nils, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, nil, nil])
265
- @c.missing_values = []
266
- assert_equal(@c.valid_data.sort, [-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10])
267
- assert_equal(@c.data_with_nils, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
268
- end
269
- should 'correct has_missing_data? with missing data' do
270
- a = [1, 2, 3, nil].to_vector
271
- assert(a.has_missing_data?)
272
- end
273
- should 'correct has_missing_data? without missing data' do
274
- a = [1, 2, 3, 4, 10].to_vector
275
- assert(!a.has_missing_data?)
276
- end
277
- should 'with explicit missing_values, should respond has_missing_data?' do
278
- a = [1, 2, 3, 4, 10].to_vector
279
- a.missing_values = [10]
280
- assert(a.has_missing_data?)
281
- end
282
- should 'label correctly fields' do
283
- @c.labels = { 5 => 'FIVE' }
284
- assert_equal(['FIVE', 'FIVE', 'FIVE', 'FIVE', 'FIVE', 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], @c.vector_labeled.to_a)
285
- end
286
- should 'verify' do
287
- h = @c.verify { |d| !d.nil? and d > 0 }
288
- e = { 15 => nil, 16 => -99, 17 => -99 }
289
- assert_equal(e, h)
290
- end
291
- should 'have a summary with name on it' do
292
- assert_match(/#{@c.name}/, @c.summary)
95
+ should "object data automatically detected as :object" do
96
+ v = Statsample::Vector.new [1,2,3,4,'hello','world']
97
+ assert_equal(:object, v.type)
293
98
  end
294
99
 
295
- should 'GSL::Vector based should push correcty' do
296
- if Statsample.has_gsl?
297
- v = GSL::Vector[1, 2, 3, 4, 5].to_numeric
298
- v.push(nil)
299
- assert_equal([1, 2, 3, 4, 5, nil], v.to_a)
300
- assert(v.flawed?)
301
- else
302
- skip('Requires GSL')
100
+ should "initialize Vector with :numeric type" do
101
+ v = Statsample::Vector.new [1,2,3,4,5,nil], :numeric
102
+ assert_equal(:numeric, v.type)
103
+ assert_output(nil, "WARNING: valid_data in Statsample::Vector has been deprecated in favor of only_valid in Daru::Vector. Please use that.\n") do
104
+ assert_equal([1,2,3,4,5], v.valid_data)
303
105
  end
304
106
  end
305
107
 
306
- should 'split correctly' do
307
- a = Statsample::Vector.new(['a', 'a,b', 'c,d', 'a,d', 'd', 10, nil], :object)
308
- assert_equal([%w(a), %w(a b), %w(c d), %w(a d), %w(d), [10], nil], a.splitted)
309
- end
310
- should 'multiply correct for scalar' do
311
- a = [1, 2, 3].to_numeric
312
- assert_equal([5, 10, 15].to_numeric, a * 5)
313
- end
314
- should 'multiply correct with other vector' do
315
- a = [1, 2, 3].to_numeric
316
- b = [2, 4, 6].to_numeric
317
-
318
- assert_equal([2, 8, 18].to_numeric, a * b)
319
- end
320
- should 'sum correct for scalar' do
321
- a = [1, 2, 3].to_numeric
322
- assert_equal([11, 12, 13].to_numeric, a + 10)
323
- end
324
-
325
- should 'raise NoMethodError when method requires numeric and vector is object' do
326
- @c.type = :object
327
- assert_raise(::NoMethodError) { @c.median }
328
- end
329
-
330
- should 'jacknife correctly with named method' do
331
- # First example
332
- a = [1, 2, 3, 4].to_numeric
333
- ds = a.jacknife(:mean)
334
- assert_equal(a.mean, ds[:mean].mean)
335
- ds = a.jacknife([:mean, :sd])
336
- assert_equal(a.mean, ds[:mean].mean)
337
- assert_equal(a.sd, ds[:mean].sd)
338
- end
339
- should 'jacknife correctly with custom method' do
340
- # Second example
341
- a = [17.23, 18.71, 13.93, 18.81, 15.78, 11.29, 14.91, 13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52, 13.45, 15.25].to_numeric
342
- ds = a.jacknife(log_s2: ->(v) { Math.log(v.variance) })
343
- exp = [1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937].to_numeric
344
-
345
- assert_similar_vector(exp, ds[:log_s2], 0.001)
346
- assert_in_delta(2.00389, ds[:log_s2].mean, 0.00001)
347
- assert_in_delta(1.091, ds[:log_s2].variance, 0.001)
348
- end
349
- should 'jacknife correctly with k>1' do
350
- a = rnorm(6)
351
- ds = a.jacknife(:mean, 2)
352
- mean = a.mean
353
- exp = [3 * mean - 2 * (a[2] + a[3] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[2] + a[3]) / 4].to_numeric
354
- assert_similar_vector(exp, ds[:mean], 1e-13)
355
- end
356
- should 'bootstrap should return a vector with mean=mu and sd=se' do
357
- a = rnorm(100)
358
- ds = a.bootstrap([:mean, :sd], 200)
359
- se = 1 / Math.sqrt(a.size)
360
- assert_in_delta(0, ds[:mean].mean, 0.3)
361
- assert_in_delta(se, ds[:mean].sd, 0.02)
362
- end
363
- end
364
-
365
- def test_object
366
- assert_equal(@c[1], 5)
367
- assert_equal({ 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 5, 6 => 2, 7 => 1, 8 => 1, 9 => 1, 10 => 1 }, @c.frequencies)
368
- assert_equal({ 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 5, 6 => 2, 7 => 1, 8 => 1, 9 => 1, 10 => 1 }, @c._frequencies)
369
- assert_equal({ 1 => 1.quo(15), 2 => 1.quo(15), 3 => 1.quo(15), 4 => 1.quo(15), 5 => 5.quo(15), 6 => 2.quo(15), 7 => 1.quo(15), 8 => 1.quo(15), 9 => 1.quo(15), 10 => 1.quo(15) }, @c.proportions)
370
- assert_equal(@c.proportion, 1.quo(15))
371
- assert_equal(@c.proportion(2), 1.quo(15))
372
- assert_equal([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], @c.factors.sort)
373
- assert_equal(@c.mode, 5)
374
- assert_equal(@c.n_valid, 15)
375
- end
376
-
377
- def test_equality
378
- v1 = [1, 2, 3].to_vector
379
- v2 = [1, 2, 3].to_vector
380
- assert_equal(v1, v2)
381
- v1 = [1, 2, 3].to_vector(:object)
382
- v2 = [1, 2, 3].to_vector(:numeric)
383
- assert_not_equal(v1, v2)
384
- v2 = [1, 2, 3]
385
- assert_not_equal(v1, v2)
386
- v1 = [1, 2, 3].to_vector
387
- v2 = [1, 2, 3].to_vector
388
- assert_equal(v1, v2)
389
- assert_equal(false, v1 == Object.new)
390
- end
391
-
392
- def test_vector_percentil
393
- a = [1, 2, 2, 3, 4, 5, 5, 5, 6, 10].to_numeric
394
- expected = [10, 25, 25, 40, 50, 70, 70, 70, 90, 100].to_numeric
395
- assert_equal(expected, a.vector_percentil)
396
- a = [1, nil, nil, 2, 2, 3, 4, nil, nil, 5, 5, 5, 6, 10].to_numeric
397
- expected = [10, nil, nil, 25, 25, 40, 50, nil, nil, 70, 70, 70, 90, 100].to_numeric
398
- assert_equal(expected, a.vector_percentil)
399
- end
400
-
401
- def test_numeric
402
- @c.type = :numeric
403
- assert_equal(5, @c.median)
404
- assert_equal(4, @c.percentil(25))
405
- assert_equal(7, @c.percentil(75))
406
-
407
- v = [200_000, 200_000, 210_000, 220_000, 230_000, 250_000, 250_000, 250_000, 270_000, 300_000, 450_000, 130_000, 140_000, 140_000, 140_000, 145_000, 148_000, 165_000, 170_000, 180_000, 180_000, 180_000, 180_000, 180_000, 180_000].to_numeric
408
- assert_equal(180_000, v.median)
409
- a = [7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 12.0, 12.0, 13.0, 14.0, 14.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0].to_numeric
410
- assert_equal(4.5, a.percentil(25))
411
- assert_equal(6.5, a.percentil(50))
412
- assert_equal(9.5, a.percentil(75))
413
- assert_equal(3.0, a.percentil(10))
414
- end
415
-
416
- def test_linear_percentil_strategy
417
- values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle.to_numeric
418
- assert_equal 102, values.percentil(0, :linear)
419
- assert_equal 104.75, values.percentil(25, :linear)
420
- assert_equal 108.5, values.percentil(50, :linear)
421
- assert_equal 112.75, values.percentil(75, :linear)
422
- assert_equal 116, values.percentil(100, :linear)
423
-
424
- values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle.to_numeric
425
- assert_equal 102, values.percentil(0, :linear)
426
- assert_equal 105, values.percentil(25, :linear)
427
- assert_equal 109, values.percentil(50, :linear)
428
- assert_equal 115, values.percentil(75, :linear)
429
- assert_equal 118, values.percentil(100, :linear)
430
- end
431
-
432
- def test_ranked
433
- v1 = [0.8, 1.2, 1.2, 2.3, 18].to_vector(:numeric)
434
- expected = [1, 2.5, 2.5, 4, 5].to_vector(:numeric)
435
- assert_equal(expected, v1.ranked)
436
- v1 = [nil, 0.8, 1.2, 1.2, 2.3, 18, nil].to_vector(:numeric)
437
- expected = [nil, 1, 2.5, 2.5, 4, 5, nil].to_vector(:numeric)
438
- assert_equal(expected, v1.ranked)
439
- end
440
-
441
- def test_numeric
442
- a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'], :numeric)
443
- assert_equal(10, a.sum)
444
- i = 0
445
- factors = a.factors.sort
446
- [0, 1, 2, 3, 4].each{|v|
447
- assert(v == factors[i])
448
- assert(v.class == factors[i].class, "#{v} - #{v.class} != #{factors[i]} - #{factors[i].class}")
449
- i += 1
450
- }
451
- end
452
-
453
- def test_vector_centered
454
- mean = rand
455
- samples = 11
456
- centered = samples.times.map { |i| i - ((samples / 2).floor).to_i }.to_numeric
457
- not_centered = centered.recode { |v| v + mean }
458
- obs = not_centered.centered
459
- centered.each_with_index do |v, i|
460
- assert_in_delta(v, obs[i], 0.0001)
461
- end
462
- end
463
-
464
- def test_vector_standarized
465
- v1 = [1, 2, 3, 4, nil].to_vector(:numeric)
466
- sds = v1.sds
467
- expected = [((1 - 2.5).quo(sds)), ((2 - 2.5).quo(sds)), ((3 - 2.5).quo(sds)), ((4 - 2.5).quo(sds)), nil].to_vector(:numeric)
468
- vs = v1.vector_standarized
469
- assert_equal(expected, vs)
470
- assert_equal(0, vs.mean)
471
- assert_equal(1, vs.sds)
472
- end
473
-
474
- def test_vector_standarized_with_zero_variance
475
- v1 = 100.times.map { |_i| 1 }.to_numeric
476
- exp = 100.times.map { nil }.to_numeric
477
- assert_equal(exp, v1.standarized)
478
- end
479
-
480
- def test_check_type
481
- v = Statsample::Vector.new
482
- v.type = :object
483
- assert_raise(NoMethodError) { v.check_type(:numeric) }
484
- assert(v.check_type(:object).nil?)
485
-
486
- v.type = :numeric
487
-
488
- assert(v.check_type(:numeric).nil?)
489
- assert(v.check_type(:object).nil?)
108
+ should "show a warning when initializing with :nominal, :numeric or :ordinal" do
109
+ assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: nominal has been deprecated.\n") do
110
+ Statsample::Vector.new [1,2,3,4,5,nil,'hello'], :nominal
111
+ end
490
112
 
491
- v.type = :date
492
- assert_raise(NoMethodError) { v.check_type(:numeric) }
493
- assert_raise(NoMethodError) { v.check_type(:numeric) }
494
- assert_raise(NoMethodError) { v.check_type(:object) }
495
- end
113
+ assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: scale has been deprecated.\n") do
114
+ Statsample::Vector.new [1,2,3,4,nil,5], :scale
115
+ end
496
116
 
497
- def test_add
498
- a = Statsample::Vector.new([1, 2, 3, 4, 5], :numeric)
499
- b = Statsample::Vector.new([11, 12, 13, 14, 15], :numeric)
500
- assert_equal([3, 4, 5, 6, 7], (a + 2).to_a)
501
- assert_equal([12, 14, 16, 18, 20], (a + b).to_a)
502
- assert_raise ArgumentError do
503
- a + @c
504
- end
505
- assert_raise TypeError do
506
- a + 'string'
507
- end
508
- a = Statsample::Vector.new([nil, 1, 2, 3, 4, 5], :numeric)
509
- b = Statsample::Vector.new([11, 12, nil, 13, 14, 15], :numeric)
510
- assert_equal([nil, 13, nil, 16, 18, 20], (a + b).to_a)
511
- assert_equal([nil, 13, nil, 16, 18, 20], (a + b.to_a).to_a)
512
- end
117
+ assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: ordinal has been deprecated.\n") do
118
+ Statsample::Vector.new [1,2,3,4,5], :ordinal
119
+ end
513
120
 
514
- def test_minus
515
- a = Statsample::Vector.new([1, 2, 3, 4, 5], :numeric)
516
- b = Statsample::Vector.new([11, 12, 13, 14, 15], :numeric)
517
- assert_equal([-1, 0, 1, 2, 3], (a - 2).to_a)
518
- assert_equal([10, 10, 10, 10, 10], (b - a).to_a)
519
- assert_raise ArgumentError do
520
- a - @c
521
- end
522
- assert_raise TypeError do
523
- a - 'string'
121
+ assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
122
+ Statsample::Vector.new_scale 10, 1
123
+ end
524
124
  end
525
- a = Statsample::Vector.new([nil, 1, 2, 3, 4, 5], :numeric)
526
- b = Statsample::Vector.new([11, 12, nil, 13, 14, 15], :numeric)
527
- assert_equal([nil, 11, nil, 10, 10, 10], (b - a).to_a)
528
- assert_equal([nil, 11, nil, 10, 10, 10], (b - a.to_a).to_a)
529
- end
530
125
 
531
- def test_sum_of_squares
532
- a = [1, 2, 3, 4, 5, 6].to_vector(:numeric)
533
- assert_equal(17.5, a.sum_of_squared_deviation)
534
- end
126
+ should "show a warning when Statsample::Vector shorthands are used" do
127
+ numeric = Statsample::Vector.new([1,2,3,4,nil,5], :numeric)
128
+ assert_equal(numeric, [1,2,3,4,nil,5].to_numeric)
129
+ assert_equal(numeric, [1,2,3,4,nil,5].to_vector(:numeric))
535
130
 
536
- def test_average_deviation
537
- a = [1, 2, 3, 4, 5, 6, 7, 8, 9].to_numeric
538
- assert_equal(20.quo(9), a.average_deviation_population)
539
- end
540
-
541
- def test_samples
542
- srand(1)
543
- assert_equal(100, @c.sample_with_replacement(100).size)
544
- assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
545
- assert_raise ArgumentError do
546
- @c.sample_without_replacement(20)
131
+ obj = Statsample::Vector.new([1,2,3,4,'one','two'], :object)
132
+ assert_equal(obj, [1,2,3,4,'one','two'].to_vector(:object))
547
133
  end
548
- @c.type = :numeric
549
- srand(1)
550
- assert_equal(100, @c.sample_with_replacement(100).size)
551
- assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
552
- end
553
-
554
- def test_valid_data
555
- a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'])
556
- a.missing_values = [-99]
557
- a.add(1, false)
558
- a.add(2, false)
559
- a.add(-99, false)
560
- a.set_valid_data
561
- exp_valid_data = [1, 2, 3, 4, 'STRING', 1, 2]
562
- assert_equal(exp_valid_data, a.valid_data)
563
- a.add(20, false)
564
- a.add(30, false)
565
- assert_equal(exp_valid_data, a.valid_data)
566
- a.set_valid_data
567
- exp_valid_data_2 = [1, 2, 3, 4, 'STRING', 1, 2, 20, 30]
568
- assert_equal(exp_valid_data_2, a.valid_data)
569
- end
570
-
571
- def test_set_value
572
- @c[2] = 10
573
- expected = [5, 5, 10, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99].to_vector
574
- assert_equal(expected.data, @c.data)
575
- end
576
134
 
577
- def test_gsl
578
- if Statsample.has_gsl?
579
- a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'], :numeric)
580
-
581
- assert_equal(2, a.mean)
582
- assert_equal(a.variance_sample_ruby, a.variance_sample)
583
- assert_equal(a.standard_deviation_sample_ruby, a.sds)
584
- assert_equal(a.variance_population_ruby, a.variance_population)
585
- assert_equal(a.standard_deviation_population_ruby, a.standard_deviation_population)
586
- assert_nothing_raised do
587
- a = [].to_vector(:numeric)
135
+ should "test that old shorthands show deprecation warnings" do
136
+ assert_output(nil,"WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
137
+ [1,2,3,4,nil,5].to_scale
588
138
  end
589
- a.add(1, false)
590
- a.add(2, false)
591
- a.set_valid_data
592
- assert_equal(3, a.sum)
593
- b = [1, 2, nil, 3, 4, 5, nil, 6].to_vector(:numeric)
594
- assert_equal(21, b.sum)
595
- assert_equal(3.5, b.mean)
596
- assert_equal(6, b.gsl.size)
597
- c = [10, 20, 30, 40, 50, 100, 1000, 2000, 5000].to_numeric
598
- assert_in_delta(c.skew, c.skew_ruby, 0.0001)
599
- assert_in_delta(c.kurtosis, c.kurtosis_ruby, 0.0001)
600
139
  end
601
140
  end
602
141
 
603
- def test_vector_matrix
604
- v1 = %w(a a a b b b c c).to_vector
605
- v2 = %w(1 3 4 5 6 4 3 2).to_vector
606
- v3 = %w(1 0 0 0 1 1 1 0).to_vector
607
- ex = Matrix.rows([%w(a 1 1), %w(a 3 0), %w(a 4 0), %w(b 5 0), %w(b 6 1), %w(b 4 1), %w(c 3 1), %w(c 2 0)])
608
- assert_equal(ex, Statsample.vector_cols_matrix(v1, v2, v3))
609
- end
610
-
611
- def test_marshalling
612
- v1 = (0..100).to_a.collect { |_n| rand(100) }.to_vector(:numeric)
613
- v2 = Marshal.load(Marshal.dump(v1))
614
- assert_equal(v1, v2)
615
- end
616
-
617
- def test_dup
618
- v1 = %w(a a a b b b c c).to_vector
619
- v2 = v1.dup
620
- assert_equal(v1.data, v2.data)
621
- assert_not_same(v1.data, v2.data)
622
- assert_equal(v1.type, v2.type)
623
-
624
- v1.type = :numeric
625
- assert_not_equal(v1.type, v2.type)
626
- assert_equal(v1.missing_values, v2.missing_values)
627
- assert_not_same(v1.missing_values, v2.missing_values)
628
- assert_equal(v1.labels, v2.labels)
629
- assert_not_same(v1.labels, v2.labels)
630
-
631
- v3 = v1.dup_empty
632
- assert_equal([], v3.data)
633
- assert_not_equal(v1.data, v3.data)
634
- assert_not_same(v1.data, v3.data)
635
- assert_equal(v1.type, v3.type)
636
- v1.type = :numeric
637
- v3.type = :object
638
- assert_not_equal(v1.type, v3.type)
639
- assert_equal(v1.missing_values, v3.missing_values)
640
- assert_not_same(v1.missing_values, v3.missing_values)
641
- assert_equal(v1.labels, v3.labels)
642
- assert_not_same(v1.labels, v3.labels)
643
- end
644
-
645
- def test_paired_ties
646
- a = [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4].to_vector(:numeric)
647
- expected = [2, 2, 2, 4.5, 4.5, 6, 7.5, 7.5, 10, 10, 10].to_vector(:numeric)
648
- assert_equal(expected, a.ranked)
649
- end
650
-
651
- def test_dichotomize
652
- a = [0, 0, 0, 1, 2, 3, nil].to_vector
653
- exp = [0, 0, 0, 1, 1, 1, nil].to_numeric
654
- assert_equal(exp, a.dichotomize)
655
- a = [1, 1, 1, 2, 2, 2, 3].to_vector
656
- exp = [0, 0, 0, 1, 1, 1, 1].to_numeric
657
- assert_equal(exp, a.dichotomize)
658
- a = [0, 0, 0, 1, 2, 3, nil].to_vector
659
- exp = [0, 0, 0, 0, 1, 1, nil].to_numeric
660
- assert_equal(exp, a.dichotomize(1))
661
- a = %w(a a a b c d).to_vector
662
- exp = [0, 0, 0, 1, 1, 1].to_numeric
663
- assert_equal(exp, a.dichotomize)
664
- end
665
-
666
- def test_can_be_methods
667
- a = [0, 0, 0, 1, 2, 3, nil].to_vector
668
- assert(a.can_be_numeric?)
669
- a = [0, 's', 0, 1, 2, 3, nil].to_vector
670
- assert(!a.can_be_numeric?)
671
- a.missing_values = ['s']
672
- assert(a.can_be_numeric?)
673
-
674
- a = [Date.new(2009, 10, 10), Date.today, '2009-10-10', '2009-1-1', nil, 'NOW'].to_vector
675
- assert(a.can_be_date?)
676
- a = [Date.new(2009, 10, 10), Date.today, nil, 'sss'].to_vector
677
- assert(!a.can_be_date?)
678
- end
679
-
680
- def test_date_vector
681
- a = [Date.new(2009, 10, 10), :NOW, '2009-10-10', '2009-1-1', nil, 'NOW', 'MISSING'].to_vector(:date, missing_values: ['MISSING'])
682
-
683
- assert(a.type == :date)
684
- expected = [Date.new(2009, 10, 10), Date.today, Date.new(2009, 10, 10), Date.new(2009, 1, 1), nil, Date.today, nil]
685
- assert_equal(expected, a.date_data_with_nils)
142
+ should 'return correct histogram' do
143
+ a = Daru::Vector.new(10.times.map { |v| v })
144
+ hist = a.histogram(2)
145
+ assert_equal([5, 5], hist.bin)
146
+ 3.times do |i|
147
+ assert_in_delta(i * 4.5, hist.get_range(i)[0], 1e-9)
148
+ end
686
149
  end
687
150
  end