statsample 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -1,3 +1,3 @@
1
1
  module Statsample
2
- VERSION = '1.5.0'
2
+ VERSION = '2.0.0'
3
3
  end
@@ -3,7 +3,7 @@ $:.unshift File.expand_path("../lib/", __FILE__)
3
3
  require 'statsample/version'
4
4
  require 'date'
5
5
 
6
- DESCRIPTION = <<MSG
6
+ Statsample::DESCRIPTION = <<MSG
7
7
  A suite for basic and advanced statistics on Ruby. Tested on CRuby 1.9.3, 2.0.0
8
8
  and 2.1.1. See `.travis.yml` for more information.
9
9
 
@@ -11,7 +11,6 @@ Include:
11
11
 
12
12
  - Descriptive statistics: frequencies, median, mean,
13
13
  standard error, skew, kurtosis (and many others).
14
- - Imports and exports datasets from and to Excel, CSV and plain text files.
15
14
  - Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial,
16
15
  tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by
17
16
  statsample-bivariate-extension gem.
@@ -32,17 +31,11 @@ scales using factor analysis and correlations, if you want it.
32
31
  - Graphics: Histogram, Boxplot and Scatterplot.
33
32
  MSG
34
33
 
35
- POSTINSTALL = <<MSG
34
+ Statsample::POSTINSTALL = <<MSG
36
35
  ***************************************************
37
36
 
38
37
  Thanks for installing statsample.
39
38
 
40
- On *nix, you could install statsample-optimization
41
- to retrieve gems gsl, statistics2 and a C extension
42
- to speed some methods.
43
-
44
- $ [sudo] gem install statsample-optimization
45
-
46
39
  *****************************************************
47
40
  MSG
48
41
 
@@ -56,8 +49,8 @@ Gem::Specification.new do |s|
56
49
  s.email = ["clbustos@gmail.com", "carlos@onox.com.br"]
57
50
 
58
51
  s.summary = "A suite for basic and advanced statistics on Ruby"
59
- s.description = DESCRIPTION
60
- s.post_install_message = POSTINSTALL
52
+ s.description = Statsample::DESCRIPTION
53
+ s.post_install_message = Statsample::POSTINSTALL
61
54
 
62
55
  s.rdoc_options = ["--main", "README.md"]
63
56
  s.extra_rdoc_files = ["History.txt", "LICENSE.txt", "README.md", "references.txt"]
@@ -67,22 +60,25 @@ Gem::Specification.new do |s|
67
60
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
68
61
  s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
69
62
 
70
- s.add_runtime_dependency 'spreadsheet', '~> 0.6.5'
63
+ s.add_runtime_dependency 'daru', '~> 0.1'
64
+ s.add_runtime_dependency 'spreadsheet', '~> 1.0.3'
71
65
  s.add_runtime_dependency 'reportbuilder', '~> 1.4'
72
66
  s.add_runtime_dependency 'minimization', '~> 0.2'
73
67
  s.add_runtime_dependency 'dirty-memoize', '~> 0.0.4'
74
- s.add_runtime_dependency 'extendmatrix', '~> 0.3'
68
+ s.add_runtime_dependency 'extendmatrix', '~> 0.4'
75
69
  s.add_runtime_dependency 'rserve-client', '~> 0.3'
76
- s.add_runtime_dependency 'rubyvis', '~> 0.5.0'
70
+ s.add_runtime_dependency 'rubyvis', '~> 0.6.1'
77
71
  s.add_runtime_dependency 'distribution', '~> 0.7'
78
72
  s.add_runtime_dependency 'awesome_print', '~> 1.6'
79
73
 
80
- s.add_development_dependency 'bundler', '~> 1.7'
74
+ s.add_development_dependency 'bundler', '~> 1.10'
81
75
  s.add_development_dependency 'rake', '~> 10.4'
82
76
  s.add_development_dependency 'rdoc', '~> 4.2'
83
77
  s.add_development_dependency 'shoulda', '~> 3.5'
84
78
  s.add_development_dependency 'shoulda-matchers', '~> 2.2'
85
- s.add_development_dependency 'minitest', '~> 5.5'
79
+ s.add_development_dependency 'minitest', '~> 5.7'
86
80
  s.add_development_dependency 'gettext', '~> 3.1'
87
81
  s.add_development_dependency 'mocha', '~> 1.1'
82
+ s.add_development_dependency 'nmatrix', '~> 0.1.0'
83
+ s.add_development_dependency 'gsl-nmatrix', '~> 1.17.0'
88
84
  end
@@ -33,7 +33,7 @@ module Minitest
33
33
  def assert_similar_vector(exp, obs, delta = 1e-10, msg = nil)
34
34
  msg ||= "Different vectors #{exp} - #{obs}"
35
35
  assert_equal(exp.size, obs.size)
36
- exp.data_with_nils.each_with_index {|v, i|
36
+ exp.to_a.each_with_index {|v, i|
37
37
  assert_in_delta(v, obs[i], delta)
38
38
  }
39
39
  end
@@ -39,7 +39,7 @@ class StatsampleAnalysisTestCase < Minitest::Test
39
39
  should 'to_text returns the same as a normal ReportBuilder object' do
40
40
  rb = ReportBuilder.new(name: :test)
41
41
  section = ReportBuilder::Section.new(name: 'first')
42
- a = [1, 2, 3].to_numeric
42
+ a = Daru::Vector.new([1, 2, 3])
43
43
  section.add('first')
44
44
  section.add(a)
45
45
  rb.add(section)
@@ -98,8 +98,8 @@ class StatsampleAnalysisTestCase < Minitest::Test
98
98
  end
99
99
  should 'attach() allows to call objects on objects which respond to fields' do
100
100
  an = Statsample::Analysis::Suite.new(:summary)
101
- ds = { 'x' => stub(mean: 10), 'y' => stub(mean: 12) }
102
- ds.expects(:fields).returns(%w(x y)).at_least_once
101
+ ds = { :x => stub(mean: 10), :y => stub(mean: 12) }
102
+ ds.expects(:vectors).returns([:x, :y]).at_least_once
103
103
  an.attach(ds)
104
104
  assert_equal(10, an.x.mean)
105
105
  assert_equal(12, an.y.mean)
@@ -109,10 +109,10 @@ class StatsampleAnalysisTestCase < Minitest::Test
109
109
  end
110
110
  should 'attached objects should be called LIFO' do
111
111
  an = Statsample::Analysis::Suite.new(:summary)
112
- ds1 = { 'x' => stub(mean: 100), 'y' => stub(mean: 120), 'z' => stub(mean: 13) }
113
- ds1.expects(:fields).returns(%w(x y z)).at_least_once
114
- ds2 = { 'x' => stub(mean: 10), 'y' => stub(mean: 12) }
115
- ds2.expects(:fields).returns(%w(x y)).at_least_once
112
+ ds1 = { :x => stub(mean: 100), :y => stub(mean: 120), :z => stub(mean: 13) }
113
+ ds1.expects(:vectors).returns([:x, :y, :z]).at_least_once
114
+ ds2 = { :x => stub(mean: 10), :y => stub(mean: 12) }
115
+ ds2.expects(:vectors).returns([:x, :y]).at_least_once
116
116
  an.attach(ds1)
117
117
  an.attach(ds2)
118
118
  assert_equal(10, an.x.mean)
@@ -122,10 +122,10 @@ class StatsampleAnalysisTestCase < Minitest::Test
122
122
 
123
123
  should 'detach() without arguments drop latest object' do
124
124
  an = Statsample::Analysis::Suite.new(:summary)
125
- ds1 = { 'x' => stub(mean: 100), 'y' => stub(mean: 120), 'z' => stub(mean: 13) }
126
- ds1.expects(:fields).returns(%w(x y z)).at_least_once
127
- ds2 = { 'x' => stub(mean: 10), 'y' => stub(mean: 12) }
128
- ds2.expects(:fields).returns(%w(x y)).at_least_once
125
+ ds1 = { :x => stub(mean: 100), :y => stub(mean: 120), :z => stub(mean: 13) }
126
+ ds1.expects(:vectors).returns([:x, :y, :z]).at_least_once
127
+ ds2 = { :x => stub(mean: 10), :y => stub(mean: 12) }
128
+ ds2.expects(:vectors).returns([:x, :y]).at_least_once
129
129
  an.attach(ds1)
130
130
  an.attach(ds2)
131
131
  assert_equal(10, an.x.mean)
@@ -134,12 +134,12 @@ class StatsampleAnalysisTestCase < Minitest::Test
134
134
  end
135
135
  should 'detach() with argument drop select object' do
136
136
  an = Statsample::Analysis::Suite.new(:summary)
137
- ds1 = { 'x' => 1 }
138
- ds1.expects(:fields).returns(%w(x)).at_least_once
139
- ds2 = { 'x' => 2, 'y' => 3 }
140
- ds2.expects(:fields).returns(%w(x y)).at_least_once
141
- ds3 = { 'y' => 4 }
142
- ds3.expects(:fields).returns(%w(y)).at_least_once
137
+ ds1 = { :x => 1 }
138
+ ds1.expects(:vectors).returns([:x]).at_least_once
139
+ ds2 = { :x => 2, :y => 3 }
140
+ ds2.expects(:vectors).returns([:x, :y]).at_least_once
141
+ ds3 = { :y => 4 }
142
+ ds3.expects(:vectors).returns([:y]).at_least_once
143
143
 
144
144
  an.attach(ds3)
145
145
  an.attach(ds2)
@@ -2,12 +2,12 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleAnovaContrastTestCase < Minitest::Test
3
3
  context(Statsample::Anova::Contrast) do
4
4
  setup do
5
- constant = [12, 13, 11, 12, 12].to_numeric
6
- frequent = [9, 10, 9, 13, 14].to_numeric
7
- infrequent = [15, 16, 17, 16, 16].to_numeric
8
- never = [17, 18, 12, 18, 20].to_numeric
9
- @vectors = [constant, frequent, infrequent, never]
10
- @c = Statsample::Anova::Contrast.new(vectors: @vectors)
5
+ constant = Daru::Vector.new([12, 13, 11, 12, 12])
6
+ frequent = Daru::Vector.new([9, 10, 9, 13, 14])
7
+ infrequent = Daru::Vector.new([15, 16, 17, 16, 16])
8
+ never = Daru::Vector.new([17, 18, 12, 18, 20])
9
+ @vectors = [constant, frequent, infrequent, never]
10
+ @c = Statsample::Anova::Contrast.new(vectors: @vectors)
11
11
  end
12
12
  should 'return correct value using c' do
13
13
  @c.c([1, -1.quo(3), -1.quo(3), -1.quo(3)])
@@ -4,14 +4,14 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
4
4
  class StatsampleAnovaTwoWayWithVectorsTestCase < Minitest::Test
5
5
  context(Statsample::Anova::TwoWayWithVectors) do
6
6
  setup do
7
- @pa = [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3].to_numeric
8
- @pa.name = 'Passive Avoidance'
9
- @a = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1].to_vector
10
- @a.labels = { 0 => '0%', 1 => '35%' }
11
- @a.name = 'Diet'
12
- @b = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1].to_vector
13
- @b.labels = { 0 => 'Young', 1 => 'Older' }
14
- @b.name = 'Age'
7
+ @pa = Daru::Vector.new [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3]
8
+ @pa.rename 'Passive Avoidance'
9
+ @a = Daru::Vector.new [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
10
+ # @a.labels = { 0 => '0%', 1 => '35%' }
11
+ @a.rename 'Diet'
12
+ @b = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
13
+ # @b.labels = { 0 => 'Young', 1 => 'Older' }
14
+ @b.rename 'Age'
15
15
  @anova = Statsample::Anova::TwoWayWithVectors.new(a: @a, b: @b, dependent: @pa)
16
16
  end
17
17
  should 'Statsample::Anova respond to #twoway_with_vectors' do
@@ -3,9 +3,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
3
3
  context(Statsample::Anova::OneWayWithVectors) do
4
4
  context('when initializing') do
5
5
  setup do
6
- @v1 = 10.times.map { rand(100) }.to_numeric
7
- @v2 = 10.times.map { rand(100) }.to_numeric
8
- @v3 = 10.times.map { rand(100) }.to_numeric
6
+ @v1 = Daru::Vector.new(10.times.map { rand(100) })
7
+ @v2 = Daru::Vector.new(10.times.map { rand(100) })
8
+ @v3 = Daru::Vector.new(10.times.map { rand(100) })
9
9
  end
10
10
  should 'be the same using [] or args*' do
11
11
  a1 = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3)
@@ -28,9 +28,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
28
28
  end
29
29
  end
30
30
  setup do
31
- @v1 = [3, 3, 2, 3, 6].to_vector(:numeric)
32
- @v2 = [7, 6, 5, 6, 7].to_vector(:numeric)
33
- @v3 = [9, 8, 9, 7, 8].to_vector(:numeric)
31
+ @v1 = Daru::Vector.new([3, 3, 2, 3, 6])
32
+ @v2 = Daru::Vector.new([7, 6, 5, 6, 7])
33
+ @v3 = Daru::Vector.new([9, 8, 9, 7, 8])
34
34
  @name = 'Anova testing'
35
35
  @anova = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3, name: @name)
36
36
  end
@@ -66,10 +66,10 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
66
66
  assert_in_delta(@anova.sst, @anova.sswg + @anova.ssbg, 0.00001)
67
67
  end
68
68
  should 'df total equal to number of n-1' do
69
- assert_equal(@v1.n + @v2.n + @v3.n - 1, @anova.df_total)
69
+ assert_equal(@v1.size + @v2.size + @v3.size - 1, @anova.df_total)
70
70
  end
71
71
  should 'df wg equal to number of n-k' do
72
- assert_equal(@v1.n + @v2.n + @v3.n - 3, @anova.df_wg)
72
+ assert_equal(@v1.size + @v2.size + @v3.size - 3, @anova.df_wg)
73
73
  end
74
74
  should 'df bg equal to number of k-1' do
75
75
  assert_equal(2, @anova.df_bg)
@@ -5,7 +5,7 @@ class StatsampleAwesomePrintBug < Minitest::Test
5
5
  require 'awesome_print'
6
6
  end
7
7
  should 'should be flawless' do
8
- a = [1, 2, 3].to_numeric
8
+ a = Daru::Vector.new([1, 2, 3])
9
9
 
10
10
  assert(a != [1, 2, 3])
11
11
  assert_nothing_raised do
@@ -4,11 +4,11 @@ class StatsampleBartlettSphericityTestCase < Minitest::Test
4
4
  include Statsample::Test
5
5
  context Statsample::Test::BartlettSphericity do
6
6
  setup do
7
- @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric
8
- @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric
9
- @v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric
7
+ @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
8
+ @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
9
+ @v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
10
10
  # KMO: 0.490
11
- ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
11
+ ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
12
12
  cor = Statsample::Bivariate.correlation_matrix(ds)
13
13
  @bs = Statsample::Test::BartlettSphericity.new(cor, 14)
14
14
  end
@@ -1,38 +1,38 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleBivariateTestCase < Minitest::Test
3
3
  should 'method sum of squares should be correct' do
4
- v1 = [1, 2, 3, 4, 5, 6].to_vector(:numeric)
5
- v2 = [6, 2, 4, 10, 12, 8].to_vector(:numeric)
4
+ v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6])
5
+ v2 = Daru::Vector.new([6, 2, 4, 10, 12, 8])
6
6
  assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1, v2))
7
7
  end
8
8
  should_with_gsl 'return same covariance with ruby and gls implementation' do
9
- v1 = 20.times.collect { |_a| rand }.to_numeric
10
- v2 = 20.times.collect { |_a| rand }.to_numeric
9
+ v1 = Daru::Vector.new(20.times.collect { |_a| rand })
10
+ v2 = Daru::Vector.new(20.times.collect { |_a| rand })
11
11
  assert_in_delta(Statsample::Bivariate.covariance(v1, v2), Statsample::Bivariate.covariance_slow(v1, v2), 0.001)
12
12
  end
13
13
 
14
14
  should_with_gsl 'return same correlation with ruby and gls implementation' do
15
- v1 = 20.times.collect { |_a| rand }.to_numeric
16
- v2 = 20.times.collect { |_a| rand }.to_numeric
15
+ v1 = Daru::Vector.new(20.times.collect { |_a| rand })
16
+ v2 = Daru::Vector.new(20.times.collect { |_a| rand })
17
17
 
18
- assert_in_delta(GSL::Stats.correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
18
+ assert_in_delta(GSL::Stats.correlation(v1.to_gsl, v2.to_gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
19
19
  end
20
20
  should 'return correct pearson correlation' do
21
- v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
22
- v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
21
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
22
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
23
23
  assert_in_delta(0.525, Statsample::Bivariate.pearson(v1, v2), 0.001)
24
24
  assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v1, v2), 0.001)
25
25
 
26
- v3 = [6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil].to_vector(:numeric)
27
- v4 = [2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500].to_vector(:numeric)
26
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil])
27
+ v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500])
28
28
  assert_in_delta(0.525, Statsample::Bivariate.pearson(v3, v4), 0.001)
29
29
  # Test ruby method
30
30
  v3a, v4a = Statsample.only_valid v3, v4
31
31
  assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a, v4a), 0.001)
32
32
  end
33
33
  should 'return correct values for t_pearson and prop_pearson' do
34
- v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
35
- v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
34
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
35
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
36
36
  r = Statsample::Bivariate::Pearson.new(v1, v2)
37
37
  assert_in_delta(0.525, r.r, 0.001)
38
38
  assert_in_delta(Statsample::Bivariate.t_pearson(v1, v2), r.t, 0.001)
@@ -40,11 +40,11 @@ class StatsampleBivariateTestCase < Minitest::Test
40
40
  assert(r.summary.size > 0)
41
41
  end
42
42
  should 'return correct correlation_matrix with nils values' do
43
- v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
44
- v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
45
- v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:numeric)
46
- v4 = [2, nil, nil, nil, 3, 7, 8, 6].to_vector(:numeric)
47
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
43
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
44
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
45
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
46
+ v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6])
47
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
48
48
  c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
49
49
  expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
50
50
  [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
@@ -61,13 +61,13 @@ class StatsampleBivariateTestCase < Minitest::Test
61
61
  end
62
62
  should_with_gsl 'return same values for optimized and pairwise covariance matrix' do
63
63
  cases = 100
64
- v1 = Statsample::Vector.new_numeric(cases) { rand }
65
- v2 = Statsample::Vector.new_numeric(cases) { rand }
66
- v3 = Statsample::Vector.new_numeric(cases) { rand }
67
- v4 = Statsample::Vector.new_numeric(cases) { rand }
68
- v5 = Statsample::Vector.new_numeric(cases) { rand }
64
+ v1 = Daru::Vector.new_with_size(cases) { rand }
65
+ v2 = Daru::Vector.new_with_size(cases) { rand }
66
+ v3 = Daru::Vector.new_with_size(cases) { rand }
67
+ v4 = Daru::Vector.new_with_size(cases) { rand }
68
+ v5 = Daru::Vector.new_with_size(cases) { rand }
69
69
 
70
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
70
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
71
71
 
72
72
  cor_opt = Statsample::Bivariate.covariance_matrix_optimized(ds)
73
73
 
@@ -76,13 +76,14 @@ class StatsampleBivariateTestCase < Minitest::Test
76
76
  end
77
77
  should_with_gsl 'return same values for optimized and pairwise correlation matrix' do
78
78
  cases = 100
79
- v1 = Statsample::Vector.new_numeric(cases) { rand }
80
- v2 = Statsample::Vector.new_numeric(cases) { rand }
81
- v3 = Statsample::Vector.new_numeric(cases) { rand }
82
- v4 = Statsample::Vector.new_numeric(cases) { rand }
83
- v5 = Statsample::Vector.new_numeric(cases) { rand }
79
+ v1 = Daru::Vector.new_with_size(cases) { rand }
80
+ v2 = Daru::Vector.new_with_size(cases) { rand }
81
+ v3 = Daru::Vector.new_with_size(cases) { rand }
82
+ v4 = Daru::Vector.new_with_size(cases) { rand }
83
+ v5 = Daru::Vector.new_with_size(cases) { rand }
84
84
 
85
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
85
+ ds = Daru::DataFrame.new({
86
+ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
86
87
 
87
88
  cor_opt = Statsample::Bivariate.correlation_matrix_optimized(ds)
88
89
 
@@ -90,11 +91,11 @@ class StatsampleBivariateTestCase < Minitest::Test
90
91
  assert_equal_matrix(cor_opt, cor_pw, 1e-15)
91
92
  end
92
93
  should 'return correct correlation_matrix without nils values' do
93
- v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
94
- v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
95
- v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:numeric)
96
- v4 = [2, 4, 6, 7, 3, 7, 8, 6].to_vector(:numeric)
97
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
94
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
95
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
96
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
97
+ v4 = Daru::Vector.new([2, 4, 6, 7, 3, 7, 8, 6])
98
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
98
99
  c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
99
100
  expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
100
101
  [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
@@ -129,25 +130,25 @@ class StatsampleBivariateTestCase < Minitest::Test
129
130
  end
130
131
 
131
132
  should "return correct value for Spearman's rho" do
132
- v1 = [86, 97, 99, 100, 101, 103, 106, 110, 112, 113].to_vector(:numeric)
133
- v2 = [0, 20, 28, 27, 50, 29, 7, 17, 6, 12].to_vector(:numeric)
133
+ v1 =Daru::Vector.new( [86, 97, 99, 100, 101, 103, 106, 110, 112, 113])
134
+ v2 =Daru::Vector.new( [0, 20, 28, 27, 50, 29, 7, 17, 6, 12])
134
135
  assert_in_delta(-0.175758, Statsample::Bivariate.spearman(v1, v2), 0.0001)
135
136
  end
136
137
  should 'return correct value for point_biserial correlation' do
137
- c = [1, 3, 5, 6, 7, 100, 200, 300, 400, 300].to_vector(:numeric)
138
- d = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0].to_vector(:numeric)
138
+ c = Daru::Vector.new([1, 3, 5, 6, 7, 100, 200, 300, 400, 300])
139
+ d = Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
139
140
  assert_raises TypeError do
140
141
  Statsample::Bivariate.point_biserial(c, d)
141
142
  end
142
143
  assert_in_delta(Statsample::Bivariate.point_biserial(d, c), Statsample::Bivariate.pearson(d, c), 0.0001)
143
144
  end
144
145
  should 'return correct value for tau_a and tau_b' do
145
- v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_vector(:numeric)
146
- v2 = [1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11].to_vector(:numeric)
146
+ v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
147
+ v2 = Daru::Vector.new([1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11])
147
148
  assert_in_delta(0.6727, Statsample::Bivariate.tau_a(v1, v2), 0.001)
148
149
  assert_in_delta(0.6727, Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1, v2).to_matrix)), 0.001)
149
- v1 = [12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27].to_vector(:numeric)
150
- v2 = [11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0].to_vector(:numeric)
150
+ v1 = Daru::Vector.new([12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27])
151
+ v2 = Daru::Vector.new([11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0])
151
152
  assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1, v2).to_matrix), 0.001)
152
153
  end
153
154
  should 'return correct value for gamma correlation' do
@@ -156,4 +157,8 @@ class StatsampleBivariateTestCase < Minitest::Test
156
157
  m2 = Matrix[[15, 12, 6, 5], [12, 8, 10, 8], [4, 6, 9, 10]]
157
158
  assert_in_delta(0.349, Statsample::Bivariate.gamma(m2), 0.001)
158
159
  end
160
+
161
+ should 'return correct residuals' do
162
+ # TODO: test Statsample::Bivariate.residuals
163
+ end
159
164
  end
@@ -1,33 +1,33 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleCodificationTestCase < Minitest::Test
3
3
  def initialize(*args)
4
- v1 = %w(run walk,run walking running sleep sleeping,dreaming sleep,dream).to_vector
4
+ v1 = Daru::Vector.new(%w(run walk,run walking running sleep sleeping,dreaming sleep,dream))
5
5
  @dict = { 'run' => 'r', 'walk' => 'w', 'walking' => 'w', 'running' => 'r', 'sleep' => 's', 'sleeping' => 's', 'dream' => 'd', 'dreaming' => 'd' }
6
- @ds = { 'v1' => v1 }.to_dataset
6
+ @ds = Daru::DataFrame.new({ :v1 => v1 })
7
7
  super
8
8
  end
9
9
 
10
10
  def test_create_hash
11
11
  expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
12
- hash = Statsample::Codification.create_hash(@ds, ['v1'])
13
- assert_equal(['v1'], hash.keys)
14
- assert_equal(expected_keys_v1, hash['v1'].keys.sort)
15
- assert_equal(expected_keys_v1, hash['v1'].values.sort)
12
+ hash = Statsample::Codification.create_hash(@ds, [:v1])
13
+ assert_equal([:v1], hash.keys)
14
+ assert_equal(expected_keys_v1, hash[:v1].keys.sort)
15
+ assert_equal(expected_keys_v1, hash[:v1].values.sort)
16
16
  end
17
17
 
18
18
  def test_create_excel
19
19
  filename = Dir.tmpdir + '/test_excel' + Time.now.to_s + '.xls'
20
20
  # filename = Tempfile.new("test_codification_"+Time.now().to_s)
21
21
  Statsample::Codification.create_excel(@ds, ['v1'], filename)
22
- field = (['v1'] * 8).to_vector
23
- keys = %w(dream dreaming run running sleep sleeping walk walking).to_vector
24
- ds = Statsample::Excel.read(filename)
25
- assert_equal(field, ds['field'])
26
- assert_equal(keys, ds['original'])
27
- assert_equal(keys, ds['recoded'])
22
+ field = Daru::Vector.new(['v1'] * 8, name: :field)
23
+ keys = Daru::Vector.new(%w(dream dreaming run running sleep sleeping walk walking))
24
+ ds = Daru::DataFrame.from_excel(filename)
25
+ assert_equal(field, ds[:field])
26
+ assert_equal(keys, ds[:original])
27
+ assert_equal(keys, ds[:recoded])
28
28
  hash = Statsample::Codification.excel_to_recoded_hash(filename)
29
- assert_equal(keys.data, hash['v1'].keys.sort)
30
- assert_equal(keys.data, hash['v1'].values.sort)
29
+ assert_equal(keys.to_a, hash[:v1].keys.sort)
30
+ assert_equal(keys.to_a, hash[:v1].values.sort)
31
31
  end
32
32
 
33
33
  def test_create_yaml
@@ -35,44 +35,44 @@ class StatsampleCodificationTestCase < Minitest::Test
35
35
  Statsample::Codification.create_yaml(@ds, [])
36
36
  end
37
37
  expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
38
- yaml_hash = Statsample::Codification.create_yaml(@ds, ['v1'])
38
+ yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1])
39
39
  h = YAML.load(yaml_hash)
40
- assert_equal(['v1'], h.keys)
41
- assert_equal(expected_keys_v1, h['v1'].keys.sort)
40
+ assert_equal([:v1], h.keys)
41
+ assert_equal(expected_keys_v1, h[:v1].keys.sort)
42
42
  tf = Tempfile.new('test_codification')
43
- yaml_hash = Statsample::Codification.create_yaml(@ds, ['v1'], tf, Statsample::SPLIT_TOKEN)
43
+ yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1], tf, Statsample::SPLIT_TOKEN)
44
44
  tf.close
45
45
  tf.open
46
46
  h = YAML.load(tf)
47
- assert_equal(['v1'], h.keys)
48
- assert_equal(expected_keys_v1, h['v1'].keys.sort)
47
+ assert_equal([:v1], h.keys)
48
+ assert_equal(expected_keys_v1, h[:v1].keys.sort)
49
49
  tf.close(true)
50
50
  end
51
51
 
52
52
  def test_recodification
53
53
  expected = [['r'], %w(w r), ['w'], ['r'], ['s'], %w(s d), %w(s d)]
54
- assert_equal(expected, Statsample::Codification.recode_vector(@ds['v1'], @dict))
55
- v2 = ['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'].to_vector
54
+ assert_equal(expected, Statsample::Codification.recode_vector(@ds[:v1], @dict))
55
+ v2 = Daru::Vector.new(['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'])
56
56
  expected = [['r'], %w(w d), nil, %w(w d)]
57
57
  assert_equal(expected, Statsample::Codification.recode_vector(v2, @dict))
58
58
  end
59
59
 
60
60
  def test_recode_dataset_simple
61
- Statsample::Codification.recode_dataset_simple!(@ds, 'v1' => @dict)
62
- expected_vector = ['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'].to_vector
63
- assert_not_equal(expected_vector, @ds['v1'])
64
- assert_equal(expected_vector, @ds['v1_recoded'])
61
+ Statsample::Codification.recode_dataset_simple!(@ds, :v1 => @dict)
62
+ expected_vector = Daru::Vector.new(['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'])
63
+ assert_not_equal(expected_vector, @ds[:v1])
64
+ assert_equal(expected_vector, @ds[:v1_recoded])
65
65
  end
66
66
 
67
67
  def test_recode_dataset_split
68
- Statsample::Codification.recode_dataset_split!(@ds, 'v1' => @dict)
68
+ Statsample::Codification.recode_dataset_split!(@ds, :v1 => @dict)
69
69
  e = {}
70
- e['r'] = [1, 1, 0, 1, 0, 0, 0].to_vector
71
- e['w'] = [0, 1, 1, 0, 0, 0, 0].to_vector
72
- e['s'] = [0, 0, 0, 0, 1, 1, 1].to_vector
73
- e['d'] = [0, 0, 0, 0, 0, 1, 1].to_vector
74
- e.each{|k, expected|
75
- assert_equal(expected, @ds['v1_' + k], "Error on key #{k}")
70
+ e['r'] = Daru::Vector.new([1, 1, 0, 1, 0, 0, 0])
71
+ e['w'] = Daru::Vector.new([0, 1, 1, 0, 0, 0, 0])
72
+ e['s'] = Daru::Vector.new([0, 0, 0, 0, 1, 1, 1])
73
+ e['d'] = Daru::Vector.new([0, 0, 0, 0, 0, 1, 1])
74
+ e.each { |k, expected|
75
+ assert_equal(expected, @ds[('v1_' + k).to_sym], "Error on key #{k}")
76
76
  }
77
77
  end
78
78
  end