statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -1,3 +1,3 @@
1
1
  module Statsample
2
- VERSION = '1.5.0'
2
+ VERSION = '2.0.0'
3
3
  end
@@ -3,7 +3,7 @@ $:.unshift File.expand_path("../lib/", __FILE__)
3
3
  require 'statsample/version'
4
4
  require 'date'
5
5
 
6
- DESCRIPTION = <<MSG
6
+ Statsample::DESCRIPTION = <<MSG
7
7
  A suite for basic and advanced statistics on Ruby. Tested on CRuby 1.9.3, 2.0.0
8
8
  and 2.1.1. See `.travis.yml` for more information.
9
9
 
@@ -11,7 +11,6 @@ Include:
11
11
 
12
12
  - Descriptive statistics: frequencies, median, mean,
13
13
  standard error, skew, kurtosis (and many others).
14
- - Imports and exports datasets from and to Excel, CSV and plain text files.
15
14
  - Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial,
16
15
  tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by
17
16
  statsample-bivariate-extension gem.
@@ -32,17 +31,11 @@ scales using factor analysis and correlations, if you want it.
32
31
  - Graphics: Histogram, Boxplot and Scatterplot.
33
32
  MSG
34
33
 
35
- POSTINSTALL = <<MSG
34
+ Statsample::POSTINSTALL = <<MSG
36
35
  ***************************************************
37
36
 
38
37
  Thanks for installing statsample.
39
38
 
40
- On *nix, you could install statsample-optimization
41
- to retrieve gems gsl, statistics2 and a C extension
42
- to speed some methods.
43
-
44
- $ [sudo] gem install statsample-optimization
45
-
46
39
  *****************************************************
47
40
  MSG
48
41
 
@@ -56,8 +49,8 @@ Gem::Specification.new do |s|
56
49
  s.email = ["clbustos@gmail.com", "carlos@onox.com.br"]
57
50
 
58
51
  s.summary = "A suite for basic and advanced statistics on Ruby"
59
- s.description = DESCRIPTION
60
- s.post_install_message = POSTINSTALL
52
+ s.description = Statsample::DESCRIPTION
53
+ s.post_install_message = Statsample::POSTINSTALL
61
54
 
62
55
  s.rdoc_options = ["--main", "README.md"]
63
56
  s.extra_rdoc_files = ["History.txt", "LICENSE.txt", "README.md", "references.txt"]
@@ -67,22 +60,25 @@ Gem::Specification.new do |s|
67
60
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
68
61
  s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
69
62
 
70
- s.add_runtime_dependency 'spreadsheet', '~> 0.6.5'
63
+ s.add_runtime_dependency 'daru', '~> 0.1'
64
+ s.add_runtime_dependency 'spreadsheet', '~> 1.0.3'
71
65
  s.add_runtime_dependency 'reportbuilder', '~> 1.4'
72
66
  s.add_runtime_dependency 'minimization', '~> 0.2'
73
67
  s.add_runtime_dependency 'dirty-memoize', '~> 0.0.4'
74
- s.add_runtime_dependency 'extendmatrix', '~> 0.3'
68
+ s.add_runtime_dependency 'extendmatrix', '~> 0.4'
75
69
  s.add_runtime_dependency 'rserve-client', '~> 0.3'
76
- s.add_runtime_dependency 'rubyvis', '~> 0.5.0'
70
+ s.add_runtime_dependency 'rubyvis', '~> 0.6.1'
77
71
  s.add_runtime_dependency 'distribution', '~> 0.7'
78
72
  s.add_runtime_dependency 'awesome_print', '~> 1.6'
79
73
 
80
- s.add_development_dependency 'bundler', '~> 1.7'
74
+ s.add_development_dependency 'bundler', '~> 1.10'
81
75
  s.add_development_dependency 'rake', '~> 10.4'
82
76
  s.add_development_dependency 'rdoc', '~> 4.2'
83
77
  s.add_development_dependency 'shoulda', '~> 3.5'
84
78
  s.add_development_dependency 'shoulda-matchers', '~> 2.2'
85
- s.add_development_dependency 'minitest', '~> 5.5'
79
+ s.add_development_dependency 'minitest', '~> 5.7'
86
80
  s.add_development_dependency 'gettext', '~> 3.1'
87
81
  s.add_development_dependency 'mocha', '~> 1.1'
82
+ s.add_development_dependency 'nmatrix', '~> 0.1.0'
83
+ s.add_development_dependency 'gsl-nmatrix', '~> 1.17.0'
88
84
  end
@@ -33,7 +33,7 @@ module Minitest
33
33
  def assert_similar_vector(exp, obs, delta = 1e-10, msg = nil)
34
34
  msg ||= "Different vectors #{exp} - #{obs}"
35
35
  assert_equal(exp.size, obs.size)
36
- exp.data_with_nils.each_with_index {|v, i|
36
+ exp.to_a.each_with_index {|v, i|
37
37
  assert_in_delta(v, obs[i], delta)
38
38
  }
39
39
  end
@@ -39,7 +39,7 @@ class StatsampleAnalysisTestCase < Minitest::Test
39
39
  should 'to_text returns the same as a normal ReportBuilder object' do
40
40
  rb = ReportBuilder.new(name: :test)
41
41
  section = ReportBuilder::Section.new(name: 'first')
42
- a = [1, 2, 3].to_numeric
42
+ a = Daru::Vector.new([1, 2, 3])
43
43
  section.add('first')
44
44
  section.add(a)
45
45
  rb.add(section)
@@ -98,8 +98,8 @@ class StatsampleAnalysisTestCase < Minitest::Test
98
98
  end
99
99
  should 'attach() allows to call objects on objects which respond to fields' do
100
100
  an = Statsample::Analysis::Suite.new(:summary)
101
- ds = { 'x' => stub(mean: 10), 'y' => stub(mean: 12) }
102
- ds.expects(:fields).returns(%w(x y)).at_least_once
101
+ ds = { :x => stub(mean: 10), :y => stub(mean: 12) }
102
+ ds.expects(:vectors).returns([:x, :y]).at_least_once
103
103
  an.attach(ds)
104
104
  assert_equal(10, an.x.mean)
105
105
  assert_equal(12, an.y.mean)
@@ -109,10 +109,10 @@ class StatsampleAnalysisTestCase < Minitest::Test
109
109
  end
110
110
  should 'attached objects should be called LIFO' do
111
111
  an = Statsample::Analysis::Suite.new(:summary)
112
- ds1 = { 'x' => stub(mean: 100), 'y' => stub(mean: 120), 'z' => stub(mean: 13) }
113
- ds1.expects(:fields).returns(%w(x y z)).at_least_once
114
- ds2 = { 'x' => stub(mean: 10), 'y' => stub(mean: 12) }
115
- ds2.expects(:fields).returns(%w(x y)).at_least_once
112
+ ds1 = { :x => stub(mean: 100), :y => stub(mean: 120), :z => stub(mean: 13) }
113
+ ds1.expects(:vectors).returns([:x, :y, :z]).at_least_once
114
+ ds2 = { :x => stub(mean: 10), :y => stub(mean: 12) }
115
+ ds2.expects(:vectors).returns([:x, :y]).at_least_once
116
116
  an.attach(ds1)
117
117
  an.attach(ds2)
118
118
  assert_equal(10, an.x.mean)
@@ -122,10 +122,10 @@ class StatsampleAnalysisTestCase < Minitest::Test
122
122
 
123
123
  should 'detach() without arguments drop latest object' do
124
124
  an = Statsample::Analysis::Suite.new(:summary)
125
- ds1 = { 'x' => stub(mean: 100), 'y' => stub(mean: 120), 'z' => stub(mean: 13) }
126
- ds1.expects(:fields).returns(%w(x y z)).at_least_once
127
- ds2 = { 'x' => stub(mean: 10), 'y' => stub(mean: 12) }
128
- ds2.expects(:fields).returns(%w(x y)).at_least_once
125
+ ds1 = { :x => stub(mean: 100), :y => stub(mean: 120), :z => stub(mean: 13) }
126
+ ds1.expects(:vectors).returns([:x, :y, :z]).at_least_once
127
+ ds2 = { :x => stub(mean: 10), :y => stub(mean: 12) }
128
+ ds2.expects(:vectors).returns([:x, :y]).at_least_once
129
129
  an.attach(ds1)
130
130
  an.attach(ds2)
131
131
  assert_equal(10, an.x.mean)
@@ -134,12 +134,12 @@ class StatsampleAnalysisTestCase < Minitest::Test
134
134
  end
135
135
  should 'detach() with argument drop select object' do
136
136
  an = Statsample::Analysis::Suite.new(:summary)
137
- ds1 = { 'x' => 1 }
138
- ds1.expects(:fields).returns(%w(x)).at_least_once
139
- ds2 = { 'x' => 2, 'y' => 3 }
140
- ds2.expects(:fields).returns(%w(x y)).at_least_once
141
- ds3 = { 'y' => 4 }
142
- ds3.expects(:fields).returns(%w(y)).at_least_once
137
+ ds1 = { :x => 1 }
138
+ ds1.expects(:vectors).returns([:x]).at_least_once
139
+ ds2 = { :x => 2, :y => 3 }
140
+ ds2.expects(:vectors).returns([:x, :y]).at_least_once
141
+ ds3 = { :y => 4 }
142
+ ds3.expects(:vectors).returns([:y]).at_least_once
143
143
 
144
144
  an.attach(ds3)
145
145
  an.attach(ds2)
@@ -2,12 +2,12 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleAnovaContrastTestCase < Minitest::Test
3
3
  context(Statsample::Anova::Contrast) do
4
4
  setup do
5
- constant = [12, 13, 11, 12, 12].to_numeric
6
- frequent = [9, 10, 9, 13, 14].to_numeric
7
- infrequent = [15, 16, 17, 16, 16].to_numeric
8
- never = [17, 18, 12, 18, 20].to_numeric
9
- @vectors = [constant, frequent, infrequent, never]
10
- @c = Statsample::Anova::Contrast.new(vectors: @vectors)
5
+ constant = Daru::Vector.new([12, 13, 11, 12, 12])
6
+ frequent = Daru::Vector.new([9, 10, 9, 13, 14])
7
+ infrequent = Daru::Vector.new([15, 16, 17, 16, 16])
8
+ never = Daru::Vector.new([17, 18, 12, 18, 20])
9
+ @vectors = [constant, frequent, infrequent, never]
10
+ @c = Statsample::Anova::Contrast.new(vectors: @vectors)
11
11
  end
12
12
  should 'return correct value using c' do
13
13
  @c.c([1, -1.quo(3), -1.quo(3), -1.quo(3)])
@@ -4,14 +4,14 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
4
4
  class StatsampleAnovaTwoWayWithVectorsTestCase < Minitest::Test
5
5
  context(Statsample::Anova::TwoWayWithVectors) do
6
6
  setup do
7
- @pa = [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3].to_numeric
8
- @pa.name = 'Passive Avoidance'
9
- @a = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1].to_vector
10
- @a.labels = { 0 => '0%', 1 => '35%' }
11
- @a.name = 'Diet'
12
- @b = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1].to_vector
13
- @b.labels = { 0 => 'Young', 1 => 'Older' }
14
- @b.name = 'Age'
7
+ @pa = Daru::Vector.new [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3]
8
+ @pa.rename 'Passive Avoidance'
9
+ @a = Daru::Vector.new [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
10
+ # @a.labels = { 0 => '0%', 1 => '35%' }
11
+ @a.rename 'Diet'
12
+ @b = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
13
+ # @b.labels = { 0 => 'Young', 1 => 'Older' }
14
+ @b.rename 'Age'
15
15
  @anova = Statsample::Anova::TwoWayWithVectors.new(a: @a, b: @b, dependent: @pa)
16
16
  end
17
17
  should 'Statsample::Anova respond to #twoway_with_vectors' do
@@ -3,9 +3,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
3
3
  context(Statsample::Anova::OneWayWithVectors) do
4
4
  context('when initializing') do
5
5
  setup do
6
- @v1 = 10.times.map { rand(100) }.to_numeric
7
- @v2 = 10.times.map { rand(100) }.to_numeric
8
- @v3 = 10.times.map { rand(100) }.to_numeric
6
+ @v1 = Daru::Vector.new(10.times.map { rand(100) })
7
+ @v2 = Daru::Vector.new(10.times.map { rand(100) })
8
+ @v3 = Daru::Vector.new(10.times.map { rand(100) })
9
9
  end
10
10
  should 'be the same using [] or args*' do
11
11
  a1 = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3)
@@ -28,9 +28,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
28
28
  end
29
29
  end
30
30
  setup do
31
- @v1 = [3, 3, 2, 3, 6].to_vector(:numeric)
32
- @v2 = [7, 6, 5, 6, 7].to_vector(:numeric)
33
- @v3 = [9, 8, 9, 7, 8].to_vector(:numeric)
31
+ @v1 = Daru::Vector.new([3, 3, 2, 3, 6])
32
+ @v2 = Daru::Vector.new([7, 6, 5, 6, 7])
33
+ @v3 = Daru::Vector.new([9, 8, 9, 7, 8])
34
34
  @name = 'Anova testing'
35
35
  @anova = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3, name: @name)
36
36
  end
@@ -66,10 +66,10 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
66
66
  assert_in_delta(@anova.sst, @anova.sswg + @anova.ssbg, 0.00001)
67
67
  end
68
68
  should 'df total equal to number of n-1' do
69
- assert_equal(@v1.n + @v2.n + @v3.n - 1, @anova.df_total)
69
+ assert_equal(@v1.size + @v2.size + @v3.size - 1, @anova.df_total)
70
70
  end
71
71
  should 'df wg equal to number of n-k' do
72
- assert_equal(@v1.n + @v2.n + @v3.n - 3, @anova.df_wg)
72
+ assert_equal(@v1.size + @v2.size + @v3.size - 3, @anova.df_wg)
73
73
  end
74
74
  should 'df bg equal to number of k-1' do
75
75
  assert_equal(2, @anova.df_bg)
@@ -5,7 +5,7 @@ class StatsampleAwesomePrintBug < Minitest::Test
5
5
  require 'awesome_print'
6
6
  end
7
7
  should 'should be flawless' do
8
- a = [1, 2, 3].to_numeric
8
+ a = Daru::Vector.new([1, 2, 3])
9
9
 
10
10
  assert(a != [1, 2, 3])
11
11
  assert_nothing_raised do
@@ -4,11 +4,11 @@ class StatsampleBartlettSphericityTestCase < Minitest::Test
4
4
  include Statsample::Test
5
5
  context Statsample::Test::BartlettSphericity do
6
6
  setup do
7
- @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric
8
- @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric
9
- @v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric
7
+ @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
8
+ @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
9
+ @v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
10
10
  # KMO: 0.490
11
- ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
11
+ ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
12
12
  cor = Statsample::Bivariate.correlation_matrix(ds)
13
13
  @bs = Statsample::Test::BartlettSphericity.new(cor, 14)
14
14
  end
@@ -1,38 +1,38 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleBivariateTestCase < Minitest::Test
3
3
  should 'method sum of squares should be correct' do
4
- v1 = [1, 2, 3, 4, 5, 6].to_vector(:numeric)
5
- v2 = [6, 2, 4, 10, 12, 8].to_vector(:numeric)
4
+ v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6])
5
+ v2 = Daru::Vector.new([6, 2, 4, 10, 12, 8])
6
6
  assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1, v2))
7
7
  end
8
8
  should_with_gsl 'return same covariance with ruby and gls implementation' do
9
- v1 = 20.times.collect { |_a| rand }.to_numeric
10
- v2 = 20.times.collect { |_a| rand }.to_numeric
9
+ v1 = Daru::Vector.new(20.times.collect { |_a| rand })
10
+ v2 = Daru::Vector.new(20.times.collect { |_a| rand })
11
11
  assert_in_delta(Statsample::Bivariate.covariance(v1, v2), Statsample::Bivariate.covariance_slow(v1, v2), 0.001)
12
12
  end
13
13
 
14
14
  should_with_gsl 'return same correlation with ruby and gls implementation' do
15
- v1 = 20.times.collect { |_a| rand }.to_numeric
16
- v2 = 20.times.collect { |_a| rand }.to_numeric
15
+ v1 = Daru::Vector.new(20.times.collect { |_a| rand })
16
+ v2 = Daru::Vector.new(20.times.collect { |_a| rand })
17
17
 
18
- assert_in_delta(GSL::Stats.correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
18
+ assert_in_delta(GSL::Stats.correlation(v1.to_gsl, v2.to_gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
19
19
  end
20
20
  should 'return correct pearson correlation' do
21
- v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
22
- v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
21
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
22
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
23
23
  assert_in_delta(0.525, Statsample::Bivariate.pearson(v1, v2), 0.001)
24
24
  assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v1, v2), 0.001)
25
25
 
26
- v3 = [6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil].to_vector(:numeric)
27
- v4 = [2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500].to_vector(:numeric)
26
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil])
27
+ v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500])
28
28
  assert_in_delta(0.525, Statsample::Bivariate.pearson(v3, v4), 0.001)
29
29
  # Test ruby method
30
30
  v3a, v4a = Statsample.only_valid v3, v4
31
31
  assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a, v4a), 0.001)
32
32
  end
33
33
  should 'return correct values for t_pearson and prop_pearson' do
34
- v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
35
- v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
34
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
35
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
36
36
  r = Statsample::Bivariate::Pearson.new(v1, v2)
37
37
  assert_in_delta(0.525, r.r, 0.001)
38
38
  assert_in_delta(Statsample::Bivariate.t_pearson(v1, v2), r.t, 0.001)
@@ -40,11 +40,11 @@ class StatsampleBivariateTestCase < Minitest::Test
40
40
  assert(r.summary.size > 0)
41
41
  end
42
42
  should 'return correct correlation_matrix with nils values' do
43
- v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
44
- v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
45
- v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:numeric)
46
- v4 = [2, nil, nil, nil, 3, 7, 8, 6].to_vector(:numeric)
47
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
43
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
44
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
45
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
46
+ v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6])
47
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
48
48
  c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
49
49
  expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
50
50
  [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
@@ -61,13 +61,13 @@ class StatsampleBivariateTestCase < Minitest::Test
61
61
  end
62
62
  should_with_gsl 'return same values for optimized and pairwise covariance matrix' do
63
63
  cases = 100
64
- v1 = Statsample::Vector.new_numeric(cases) { rand }
65
- v2 = Statsample::Vector.new_numeric(cases) { rand }
66
- v3 = Statsample::Vector.new_numeric(cases) { rand }
67
- v4 = Statsample::Vector.new_numeric(cases) { rand }
68
- v5 = Statsample::Vector.new_numeric(cases) { rand }
64
+ v1 = Daru::Vector.new_with_size(cases) { rand }
65
+ v2 = Daru::Vector.new_with_size(cases) { rand }
66
+ v3 = Daru::Vector.new_with_size(cases) { rand }
67
+ v4 = Daru::Vector.new_with_size(cases) { rand }
68
+ v5 = Daru::Vector.new_with_size(cases) { rand }
69
69
 
70
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
70
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
71
71
 
72
72
  cor_opt = Statsample::Bivariate.covariance_matrix_optimized(ds)
73
73
 
@@ -76,13 +76,14 @@ class StatsampleBivariateTestCase < Minitest::Test
76
76
  end
77
77
  should_with_gsl 'return same values for optimized and pairwise correlation matrix' do
78
78
  cases = 100
79
- v1 = Statsample::Vector.new_numeric(cases) { rand }
80
- v2 = Statsample::Vector.new_numeric(cases) { rand }
81
- v3 = Statsample::Vector.new_numeric(cases) { rand }
82
- v4 = Statsample::Vector.new_numeric(cases) { rand }
83
- v5 = Statsample::Vector.new_numeric(cases) { rand }
79
+ v1 = Daru::Vector.new_with_size(cases) { rand }
80
+ v2 = Daru::Vector.new_with_size(cases) { rand }
81
+ v3 = Daru::Vector.new_with_size(cases) { rand }
82
+ v4 = Daru::Vector.new_with_size(cases) { rand }
83
+ v5 = Daru::Vector.new_with_size(cases) { rand }
84
84
 
85
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
85
+ ds = Daru::DataFrame.new({
86
+ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
86
87
 
87
88
  cor_opt = Statsample::Bivariate.correlation_matrix_optimized(ds)
88
89
 
@@ -90,11 +91,11 @@ class StatsampleBivariateTestCase < Minitest::Test
90
91
  assert_equal_matrix(cor_opt, cor_pw, 1e-15)
91
92
  end
92
93
  should 'return correct correlation_matrix without nils values' do
93
- v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
94
- v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
95
- v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:numeric)
96
- v4 = [2, 4, 6, 7, 3, 7, 8, 6].to_vector(:numeric)
97
- ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
94
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
95
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
96
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
97
+ v4 = Daru::Vector.new([2, 4, 6, 7, 3, 7, 8, 6])
98
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
98
99
  c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
99
100
  expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
100
101
  [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
@@ -129,25 +130,25 @@ class StatsampleBivariateTestCase < Minitest::Test
129
130
  end
130
131
 
131
132
  should "return correct value for Spearman's rho" do
132
- v1 = [86, 97, 99, 100, 101, 103, 106, 110, 112, 113].to_vector(:numeric)
133
- v2 = [0, 20, 28, 27, 50, 29, 7, 17, 6, 12].to_vector(:numeric)
133
+ v1 =Daru::Vector.new( [86, 97, 99, 100, 101, 103, 106, 110, 112, 113])
134
+ v2 =Daru::Vector.new( [0, 20, 28, 27, 50, 29, 7, 17, 6, 12])
134
135
  assert_in_delta(-0.175758, Statsample::Bivariate.spearman(v1, v2), 0.0001)
135
136
  end
136
137
  should 'return correct value for point_biserial correlation' do
137
- c = [1, 3, 5, 6, 7, 100, 200, 300, 400, 300].to_vector(:numeric)
138
- d = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0].to_vector(:numeric)
138
+ c = Daru::Vector.new([1, 3, 5, 6, 7, 100, 200, 300, 400, 300])
139
+ d = Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
139
140
  assert_raises TypeError do
140
141
  Statsample::Bivariate.point_biserial(c, d)
141
142
  end
142
143
  assert_in_delta(Statsample::Bivariate.point_biserial(d, c), Statsample::Bivariate.pearson(d, c), 0.0001)
143
144
  end
144
145
  should 'return correct value for tau_a and tau_b' do
145
- v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_vector(:numeric)
146
- v2 = [1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11].to_vector(:numeric)
146
+ v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
147
+ v2 = Daru::Vector.new([1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11])
147
148
  assert_in_delta(0.6727, Statsample::Bivariate.tau_a(v1, v2), 0.001)
148
149
  assert_in_delta(0.6727, Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1, v2).to_matrix)), 0.001)
149
- v1 = [12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27].to_vector(:numeric)
150
- v2 = [11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0].to_vector(:numeric)
150
+ v1 = Daru::Vector.new([12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27])
151
+ v2 = Daru::Vector.new([11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0])
151
152
  assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1, v2).to_matrix), 0.001)
152
153
  end
153
154
  should 'return correct value for gamma correlation' do
@@ -156,4 +157,8 @@ class StatsampleBivariateTestCase < Minitest::Test
156
157
  m2 = Matrix[[15, 12, 6, 5], [12, 8, 10, 8], [4, 6, 9, 10]]
157
158
  assert_in_delta(0.349, Statsample::Bivariate.gamma(m2), 0.001)
158
159
  end
160
+
161
+ should 'return correct residuals' do
162
+ # TODO: test Statsample::Bivariate.residuals
163
+ end
159
164
  end
@@ -1,33 +1,33 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleCodificationTestCase < Minitest::Test
3
3
  def initialize(*args)
4
- v1 = %w(run walk,run walking running sleep sleeping,dreaming sleep,dream).to_vector
4
+ v1 = Daru::Vector.new(%w(run walk,run walking running sleep sleeping,dreaming sleep,dream))
5
5
  @dict = { 'run' => 'r', 'walk' => 'w', 'walking' => 'w', 'running' => 'r', 'sleep' => 's', 'sleeping' => 's', 'dream' => 'd', 'dreaming' => 'd' }
6
- @ds = { 'v1' => v1 }.to_dataset
6
+ @ds = Daru::DataFrame.new({ :v1 => v1 })
7
7
  super
8
8
  end
9
9
 
10
10
  def test_create_hash
11
11
  expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
12
- hash = Statsample::Codification.create_hash(@ds, ['v1'])
13
- assert_equal(['v1'], hash.keys)
14
- assert_equal(expected_keys_v1, hash['v1'].keys.sort)
15
- assert_equal(expected_keys_v1, hash['v1'].values.sort)
12
+ hash = Statsample::Codification.create_hash(@ds, [:v1])
13
+ assert_equal([:v1], hash.keys)
14
+ assert_equal(expected_keys_v1, hash[:v1].keys.sort)
15
+ assert_equal(expected_keys_v1, hash[:v1].values.sort)
16
16
  end
17
17
 
18
18
  def test_create_excel
19
19
  filename = Dir.tmpdir + '/test_excel' + Time.now.to_s + '.xls'
20
20
  # filename = Tempfile.new("test_codification_"+Time.now().to_s)
21
21
  Statsample::Codification.create_excel(@ds, ['v1'], filename)
22
- field = (['v1'] * 8).to_vector
23
- keys = %w(dream dreaming run running sleep sleeping walk walking).to_vector
24
- ds = Statsample::Excel.read(filename)
25
- assert_equal(field, ds['field'])
26
- assert_equal(keys, ds['original'])
27
- assert_equal(keys, ds['recoded'])
22
+ field = Daru::Vector.new(['v1'] * 8, name: :field)
23
+ keys = Daru::Vector.new(%w(dream dreaming run running sleep sleeping walk walking))
24
+ ds = Daru::DataFrame.from_excel(filename)
25
+ assert_equal(field, ds[:field])
26
+ assert_equal(keys, ds[:original])
27
+ assert_equal(keys, ds[:recoded])
28
28
  hash = Statsample::Codification.excel_to_recoded_hash(filename)
29
- assert_equal(keys.data, hash['v1'].keys.sort)
30
- assert_equal(keys.data, hash['v1'].values.sort)
29
+ assert_equal(keys.to_a, hash[:v1].keys.sort)
30
+ assert_equal(keys.to_a, hash[:v1].values.sort)
31
31
  end
32
32
 
33
33
  def test_create_yaml
@@ -35,44 +35,44 @@ class StatsampleCodificationTestCase < Minitest::Test
35
35
  Statsample::Codification.create_yaml(@ds, [])
36
36
  end
37
37
  expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
38
- yaml_hash = Statsample::Codification.create_yaml(@ds, ['v1'])
38
+ yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1])
39
39
  h = YAML.load(yaml_hash)
40
- assert_equal(['v1'], h.keys)
41
- assert_equal(expected_keys_v1, h['v1'].keys.sort)
40
+ assert_equal([:v1], h.keys)
41
+ assert_equal(expected_keys_v1, h[:v1].keys.sort)
42
42
  tf = Tempfile.new('test_codification')
43
- yaml_hash = Statsample::Codification.create_yaml(@ds, ['v1'], tf, Statsample::SPLIT_TOKEN)
43
+ yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1], tf, Statsample::SPLIT_TOKEN)
44
44
  tf.close
45
45
  tf.open
46
46
  h = YAML.load(tf)
47
- assert_equal(['v1'], h.keys)
48
- assert_equal(expected_keys_v1, h['v1'].keys.sort)
47
+ assert_equal([:v1], h.keys)
48
+ assert_equal(expected_keys_v1, h[:v1].keys.sort)
49
49
  tf.close(true)
50
50
  end
51
51
 
52
52
  def test_recodification
53
53
  expected = [['r'], %w(w r), ['w'], ['r'], ['s'], %w(s d), %w(s d)]
54
- assert_equal(expected, Statsample::Codification.recode_vector(@ds['v1'], @dict))
55
- v2 = ['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'].to_vector
54
+ assert_equal(expected, Statsample::Codification.recode_vector(@ds[:v1], @dict))
55
+ v2 = Daru::Vector.new(['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'])
56
56
  expected = [['r'], %w(w d), nil, %w(w d)]
57
57
  assert_equal(expected, Statsample::Codification.recode_vector(v2, @dict))
58
58
  end
59
59
 
60
60
  def test_recode_dataset_simple
61
- Statsample::Codification.recode_dataset_simple!(@ds, 'v1' => @dict)
62
- expected_vector = ['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'].to_vector
63
- assert_not_equal(expected_vector, @ds['v1'])
64
- assert_equal(expected_vector, @ds['v1_recoded'])
61
+ Statsample::Codification.recode_dataset_simple!(@ds, :v1 => @dict)
62
+ expected_vector = Daru::Vector.new(['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'])
63
+ assert_not_equal(expected_vector, @ds[:v1])
64
+ assert_equal(expected_vector, @ds[:v1_recoded])
65
65
  end
66
66
 
67
67
  def test_recode_dataset_split
68
- Statsample::Codification.recode_dataset_split!(@ds, 'v1' => @dict)
68
+ Statsample::Codification.recode_dataset_split!(@ds, :v1 => @dict)
69
69
  e = {}
70
- e['r'] = [1, 1, 0, 1, 0, 0, 0].to_vector
71
- e['w'] = [0, 1, 1, 0, 0, 0, 0].to_vector
72
- e['s'] = [0, 0, 0, 0, 1, 1, 1].to_vector
73
- e['d'] = [0, 0, 0, 0, 0, 1, 1].to_vector
74
- e.each{|k, expected|
75
- assert_equal(expected, @ds['v1_' + k], "Error on key #{k}")
70
+ e['r'] = Daru::Vector.new([1, 1, 0, 1, 0, 0, 0])
71
+ e['w'] = Daru::Vector.new([0, 1, 1, 0, 0, 0, 0])
72
+ e['s'] = Daru::Vector.new([0, 0, 0, 0, 1, 1, 1])
73
+ e['d'] = Daru::Vector.new([0, 0, 0, 0, 0, 1, 1])
74
+ e.each { |k, expected|
75
+ assert_equal(expected, @ds[('v1_' + k).to_sym], "Error on key #{k}")
76
76
  }
77
77
  end
78
78
  end