statsample 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -7,26 +7,32 @@ class StatsampleFactorTestCase < Minitest::Test
7
7
  # Based on Hardle and Simar
8
8
  def setup
9
9
  @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
10
+ Daru.lazy_update = true
11
+ end
12
+
13
+ def teardown
14
+ Daru.lazy_update = false
10
15
  end
11
16
  # Based on Hurdle example
12
17
  def test_covariance_matrix
13
- ds = Statsample::PlainText.read(@fixtures_dir + '/bank2.dat', %w(v1 v2 v3 v4 v5 v6))
14
- ds.fields.each {|f|
15
- ds[f] = ds[f].centered
18
+ ds = Daru::DataFrame.from_plaintext(@fixtures_dir + '/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6])
19
+ ds.vectors.each {|f|
20
+ ds[f] = ds[f].center
16
21
  }
17
- cm = ds.covariance_matrix
22
+ ds.update
23
+ cm = Statsample::Bivariate.covariance_matrix ds
18
24
  pca = Statsample::Factor::PCA.new(cm, m: 6)
19
25
  # puts pca.summary
20
26
  # puts pca.feature_matrix
21
- exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].to_numeric
22
- assert_similar_vector(exp_eig, pca.eigenvalues.to_numeric, 0.1)
27
+ exp_eig = Daru::Vector.new([2.985, 0.931, 0.242, 0.194, 0.085, 0.035])
28
+ assert_similar_vector(exp_eig, Daru::Vector.new(pca.eigenvalues), 0.1)
23
29
  pcs = pca.principal_components(ds)
24
30
  k = 6
25
31
  comp_matrix = pca.component_matrix
26
32
  k.times {|i|
27
- pc_id = "PC_#{i + 1}"
33
+ pc_id = "PC_#{i + 1}".to_sym
28
34
  k.times {|j| # variable
29
- ds_id = "v#{j + 1}"
35
+ ds_id = "v#{j + 1}".to_sym
30
36
  r = Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
31
37
  assert_in_delta(r, comp_matrix[j, i])
32
38
  }
@@ -42,13 +48,13 @@ class StatsampleFactorTestCase < Minitest::Test
42
48
  samples = 20
43
49
  [3, 5, 7].each {|k|
44
50
  v = {}
45
- v['x0'] = samples.times.map { ran.call }.to_numeric.centered
46
- (1...k).each {|i|
47
- v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.to_numeric.centered
51
+ v[:x0] = Daru::Vector.new(samples.times.map { ran.call }).center
52
+ (1...k).each { |i|
53
+ v["x#{i}".to_sym] = Daru::Vector.new(samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}".to_sym][ii] * 0.5 }).center
48
54
  }
49
55
 
50
- ds = v.to_dataset
51
- cm = ds.covariance_matrix
56
+ ds = Daru::DataFrame.new(v)
57
+ cm = Statsample::Bivariate.covariance_matrix ds
52
58
  # @r.assign('ds',ds)
53
59
  # @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
54
60
  # puts "eigenvalues"
@@ -61,14 +67,14 @@ class StatsampleFactorTestCase < Minitest::Test
61
67
  cm_ruby = pca_ruby.component_matrix
62
68
  # puts cm_ruby.summary
63
69
  k.times {|i|
64
- pc_id = "PC_#{i + 1}"
70
+ pc_id = "PC_#{i + 1}".to_sym
65
71
  assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
66
72
  # Revert gsl component values
67
73
  pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
68
74
  assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
69
75
  if false
70
76
  k.times {|j| # variable
71
- ds_id = "x#{j}"
77
+ ds_id = "x#{j}".to_sym
72
78
  r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
73
79
  puts "#{pc_id}-#{ds_id}:#{r}"
74
80
  }
@@ -80,18 +86,22 @@ class StatsampleFactorTestCase < Minitest::Test
80
86
  end
81
87
 
82
88
  def test_principalcomponents
83
- principalcomponents(true) if Statsample.has_gsl?
89
+ if Statsample.has_gsl?
90
+ principalcomponents(true)
91
+ else
92
+ skip "Require GSL"
93
+ end
84
94
  principalcomponents(false)
85
95
  end
86
96
 
87
97
  def principalcomponents(gsl)
88
98
  ran = Distribution::Normal.rng
89
99
  samples = 50
90
- x1 = samples.times.map { ran.call }.to_numeric
91
- x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.to_numeric
92
- ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
100
+ x1 = Daru::Vector.new(samples.times.map { ran.call })
101
+ x2 = Daru::Vector.new(samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 })
102
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
93
103
 
94
- cm = ds.correlation_matrix
104
+ cm = Statsample::Bivariate.correlation_matrix ds
95
105
  r = cm[0, 1]
96
106
  pca = Statsample::Factor::PCA.new(cm, m: 2, use_gsl: gsl)
97
107
  assert_in_delta(1 + r, pca.eigenvalues[0], 1e-10)
@@ -103,14 +113,14 @@ class StatsampleFactorTestCase < Minitest::Test
103
113
  assert_equal_vector(hs * m_1, pca.eigenvectors[1])
104
114
 
105
115
  pcs = pca.principal_components(ds)
106
- exp_pc_1 = ds.collect_with_index {|row, _i|
107
- hs * (row['x1'] + row['x2'])
116
+ exp_pc_1 = ds.collect_row_with_index {|row, _i|
117
+ hs * (row[:x1] + row[:x2])
108
118
  }
109
- exp_pc_2 = ds.collect_with_index {|row, _i|
110
- gsl ? hs * (row['x2'] - row['x1']) : hs * (row['x1'] - row['x2'])
119
+ exp_pc_2 = ds.collect_row_with_index {|row, _i|
120
+ gsl ? hs * (row[:x2] - row[:x1]) : hs * (row[:x1] - row[:x2])
111
121
  }
112
- assert_similar_vector(exp_pc_1, pcs['PC_1'])
113
- assert_similar_vector(exp_pc_2, pcs['PC_2'])
122
+ assert_similar_vector(exp_pc_1, pcs[:PC_1])
123
+ assert_similar_vector(exp_pc_2, pcs[:PC_2])
114
124
  end
115
125
 
116
126
  def test_antiimage
@@ -121,11 +131,11 @@ class StatsampleFactorTestCase < Minitest::Test
121
131
  end
122
132
 
123
133
  def test_kmo
124
- @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric
125
- @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric
126
- @v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric
134
+ @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
135
+ @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
136
+ @v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
127
137
  # KMO: 0.490
128
- ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
138
+ ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
129
139
  cor = Statsample::Bivariate.correlation_matrix(ds)
130
140
  kmo = Statsample::Factor.kmo(cor)
131
141
  assert_in_delta(0.667, kmo, 0.001)
@@ -141,12 +151,12 @@ class StatsampleFactorTestCase < Minitest::Test
141
151
  end
142
152
  # Tested with SPSS and R
143
153
  def test_pca
144
-
145
- a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_numeric
146
- b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_numeric
147
- a.recode! { |c| c - a.mean }
148
- b.recode! { |c| c - b.mean }
149
- ds = { 'a' => a, 'b' => b }.to_dataset
154
+ dtype = Statsample.has_gsl? ? :gsl : :array
155
+ a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1], dtype: dtype)
156
+ b = Daru::Vector.new([2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9], dtype: dtype)
157
+ a = a - a.mean
158
+ b = b - b.mean
159
+ ds = Daru::DataFrame.new({ :a => a, :b => b })
150
160
 
151
161
  cov_matrix = Statsample::Bivariate.covariance_matrix(ds)
152
162
  if Statsample.has_gsl?
@@ -160,8 +170,6 @@ class StatsampleFactorTestCase < Minitest::Test
160
170
  end
161
171
 
162
172
  def pca_set(pca, _type)
163
-
164
-
165
173
  expected_eigenvalues = [1.284, 0.0490]
166
174
  expected_eigenvalues.each_with_index{|ev, i|
167
175
  assert_in_delta(ev, pca.eigenvalues[i], 0.001)
@@ -7,6 +7,11 @@ class StatsampleFactorTestCase < Minitest::Test
7
7
  # Based on Hardle and Simar
8
8
  def setup
9
9
  @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
10
+ Daru.lazy_update = true
11
+ end
12
+
13
+ def teardown
14
+ Daru.lazy_update = false
10
15
  end
11
16
 
12
17
  def test_parallelanalysis_with_data
@@ -15,26 +20,30 @@ class StatsampleFactorTestCase < Minitest::Test
15
20
  variables = 10
16
21
  iterations = 50
17
22
  rng = Distribution::Normal.rng
18
- f1 = samples.times.collect { rng.call }.to_numeric
19
- f2 = samples.times.collect { rng.call }.to_numeric
23
+ f1 = Daru::Vector.new(samples.times.collect { rng.call })
24
+ f2 = Daru::Vector.new(samples.times.collect { rng.call })
20
25
  vectors = {}
21
26
  variables.times do |i|
22
27
  if i < 5
23
- vectors["v#{i}"] = samples.times.collect {|nv|
24
- f1[nv] * 5 + f2[nv] * 2 + rng.call
25
- }.to_numeric
28
+ vectors["v#{i}".to_sym] = Daru::Vector.new(
29
+ samples.times.collect { |nv|
30
+ f1[nv] * 5 + f2[nv] * 2 + rng.call
31
+ }
32
+ )
26
33
  else
27
- vectors["v#{i}"] = samples.times.collect {|nv|
28
- f2[nv] * 5 + f1[nv] * 2 + rng.call
29
- }.to_numeric
34
+ vectors["v#{i}".to_sym] = Daru::Vector.new(
35
+ samples.times.collect { |nv|
36
+ f2[nv] * 5 + f1[nv] * 2 + rng.call
37
+ }
38
+ )
30
39
  end
31
40
  end
32
- ds = vectors.to_dataset
41
+ ds = Daru::DataFrame.new(vectors)
33
42
 
34
43
  pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
35
44
  pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
36
45
  3.times do |n|
37
- var = "ev_0000#{n + 1}"
46
+ var = "ev_0000#{n + 1}".to_sym
38
47
  assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.05)
39
48
  end
40
49
  else
@@ -44,9 +53,9 @@ class StatsampleFactorTestCase < Minitest::Test
44
53
 
45
54
  def test_parallelanalysis
46
55
  pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
47
- assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
48
- assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
49
- assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
56
+ assert_in_delta(1.2454, pa.ds_eigenvalues[:ev_00001].mean, 0.01)
57
+ assert_in_delta(1.1542, pa.ds_eigenvalues[:ev_00002].mean, 0.01)
58
+ assert_in_delta(1.0836, pa.ds_eigenvalues[:ev_00003].mean, 0.01)
50
59
  assert(pa.summary.size > 0)
51
60
  end
52
61
  end
@@ -2,11 +2,11 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  require 'ostruct'
3
3
  class StatsampleGGobiTestCase < Minitest::Test
4
4
  def setup
5
- v1 = ([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10).to_vector(:numeric)
6
- @v2 = (%w(a b c a a a b b c d) * 10).to_vector(:object)
5
+ v1 = Daru::Vector.new([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10)
6
+ @v2 = Daru::Vector.new(%w(a b c a a a b b c d) * 10)
7
7
  @v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
8
- v3 = ([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10).to_vector(:numeric)
9
- @ds = { 'v1' => v1, 'v2' => @v2, 'v3' => v3 }.to_dataset
8
+ v3 = Daru::Vector.new([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10)
9
+ @ds = Daru::DataFrame.new({ :v1 => v1, :v2 => @v2, :v3 => v3 })
10
10
  end
11
11
 
12
12
  def test_values_definition
@@ -1,10 +1,10 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleGSLTestCase < Minitest::Test
3
3
  should_with_gsl 'matrix with gsl' do
4
- a = [1, 2, 3, 4, 20].to_vector(:numeric)
5
- b = [3, 2, 3, 4, 50].to_vector(:numeric)
6
- c = [6, 2, 3, 4, 3].to_vector(:numeric)
7
- ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
4
+ a = Daru::Vector.new([1, 2, 3, 4, 20])
5
+ b = Daru::Vector.new([3, 2, 3, 4, 50])
6
+ c = Daru::Vector.new([6, 2, 3, 4, 3])
7
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
8
8
  gsl = ds.to_matrix.to_gsl
9
9
  assert_equal(5, gsl.size1)
10
10
  assert_equal(3, gsl.size2)
@@ -75,13 +75,13 @@ class StatsampleHistogramTestCase < Minitest::Test
75
75
  assert_equal(min, h.min_val)
76
76
  end
77
77
  should 'return correct estimated mean' do
78
- a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5].to_numeric
78
+ a = Daru::Vector.new([1.5, 1.5, 1.5, 3.5, 3.5, 3.5])
79
79
  h = Statsample::Histogram.alloc(5, [0, 5])
80
80
  h.increment(a)
81
81
  assert_equal(2.5, h.estimated_mean)
82
82
  end
83
83
  should 'return correct estimated standard deviation' do
84
- a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5].to_numeric
84
+ a = Daru::Vector.new([0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5])
85
85
  h = Statsample::Histogram.alloc(5, [0, 5])
86
86
  h.increment(a)
87
87
  assert_equal(a.sd, h.estimated_standard_deviation)
@@ -100,7 +100,7 @@ class StatsampleHistogramTestCase < Minitest::Test
100
100
  end
101
101
  should 'not raise exception when all values equal' do
102
102
  assert_nothing_raised do
103
- a = [5, 5, 5, 5, 5, 5].to_numeric
103
+ a = Daru::Vector.new([5, 5, 5, 5, 5, 5])
104
104
  h = Statsample::Graph::Histogram.new(a)
105
105
  h.to_svg
106
106
  end
@@ -4,17 +4,17 @@ class StatsampleMatrixTestCase < Minitest::Test
4
4
  def test_to_dataset
5
5
  m = Matrix[[1, 4], [2, 5], [3, 6]]
6
6
  m.extend Statsample::NamedMatrix
7
- m.fields_y = %w(x1 x2)
7
+ m.fields_y = [:x1, :x2]
8
8
  m.name = 'test'
9
9
  samples = 100
10
- x1 = [1, 2, 3].to_numeric
11
- x2 = [4, 5, 6].to_numeric
12
- ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
13
- ds.name = 'test'
14
- obs = m.to_dataset
15
- assert_equal(ds['x1'], obs['x1'])
16
- assert_equal(ds['x2'], obs['x2'])
17
- assert_equal(ds['x1'].mean, obs['x1'].mean)
10
+ x1 =Daru::Vector.new([1, 2, 3])
11
+ x2 =Daru::Vector.new([4, 5, 6])
12
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
13
+ ds.rename 'test'
14
+ obs = m.to_dataframe
15
+ assert_equal(ds[:x1], obs[:x1])
16
+ assert_equal(ds[:x2], obs[:x2])
17
+ assert_equal(ds[:x1].mean, obs[:x1].mean)
18
18
  end
19
19
 
20
20
  def test_covariate
@@ -33,10 +33,10 @@ class StatsampleMatrixTestCase < Minitest::Test
33
33
 
34
34
  assert_equal(:covariance, a._type)
35
35
 
36
- a = 50.times.collect { rand }.to_numeric
37
- b = 50.times.collect { rand }.to_numeric
38
- c = 50.times.collect { rand }.to_numeric
39
- ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
36
+ a = Daru::Vector.new(50.times.collect { rand })
37
+ b = Daru::Vector.new(50.times.collect { rand })
38
+ c = Daru::Vector.new(50.times.collect { rand })
39
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
40
40
  corr = Statsample::Bivariate.correlation_matrix(ds)
41
41
  real = Statsample::Bivariate.covariance_matrix(ds).correlation
42
42
  corr.row_size.times do |i|
@@ -2,122 +2,134 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
 
3
3
  class StatsampleMultisetTestCase < Minitest::Test
4
4
  def setup
5
- @x = %w(a a a a b b b b).to_vector
6
- @y = [1, 2, 3, 4, 5, 6, 7, 8].to_numeric
7
- @z = [10, 11, 12, 13, 14, 15, 16, 17].to_numeric
8
- @ds = { 'x' => @x, 'y' => @y, 'z' => @z }.to_dataset
9
- @ms = @ds.to_multiset_by_split('x')
5
+ @x = Daru::Vector.new(%w(a a a a b b b b))
6
+ @y = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8])
7
+ @z = Daru::Vector.new([10, 11, 12, 13, 14, 15, 16, 17])
8
+ @ds = Daru::DataFrame.new({ :x => @x, :y => @y, :z => @z })
9
+ @ms = @ds.to_multiset_by_split(:x)
10
10
  end
11
11
 
12
12
  def test_creation
13
- v1a = [1, 2, 3, 4, 5].to_vector
14
- v2b = [11, 21, 31, 41, 51].to_vector
15
- v3c = [21, 23, 34, 45, 56].to_vector
16
- ds1 = { 'v1' => v1a, 'v2' => v2b, 'v3' => v3c }.to_dataset
17
- v1b = [15, 25, 35, 45, 55].to_vector
18
- v2b = [11, 21, 31, 41, 51].to_vector
19
- v3b = [21, 23, 34, 45, 56].to_vector
20
- ds2 = { 'v1' => v1b, 'v2' => v2b, 'v3' => v3b }.to_dataset
21
- ms = Statsample::Multiset.new(%w(v1 v2 v3))
22
- ms.add_dataset('ds1', ds1)
23
- ms.add_dataset('ds2', ds2)
24
- assert_equal(ds1, ms['ds1'])
25
- assert_equal(ds2, ms['ds2'])
26
- assert_equal(v1a, ms['ds1']['v1'])
27
- assert_not_equal(v1b, ms['ds1']['v1'])
28
- ds3 = { 'v1' => v1b, 'v2' => v2b }.to_dataset
13
+ v1a = Daru::Vector.new([1, 2, 3, 4, 5])
14
+ v2b = Daru::Vector.new([11, 21, 31, 41, 51])
15
+ v3c = Daru::Vector.new([21, 23, 34, 45, 56])
16
+ ds1 = Daru::DataFrame.new({ :v1 => v1a, :v2 => v2b, :v3 => v3c })
17
+ v1b = Daru::Vector.new([15, 25, 35, 45, 55])
18
+ v2b = Daru::Vector.new([11, 21, 31, 41, 51])
19
+ v3b = Daru::Vector.new([21, 23, 34, 45, 56])
20
+ ds2 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b, :v3 => v3b })
21
+ ms = Statsample::Multiset.new([:v1, :v2, :v3])
22
+ ms.add_dataset(:ds1, ds1)
23
+ ms.add_dataset(:ds2, ds2)
24
+ assert_equal(ds1, ms[:ds1])
25
+ assert_equal(ds2, ms[:ds2])
26
+ assert_equal(v1a, ms[:ds1][:v1])
27
+ assert_not_equal(v1b, ms[:ds1][:v1])
28
+ ds3 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b })
29
29
  assert_raise ArgumentError do
30
30
  ms.add_dataset(ds3)
31
31
  end
32
32
  end
33
33
 
34
34
  def test_creation_empty
35
- ms = Statsample::Multiset.new_empty_vectors(%w(id age name), %w(male female))
36
- ds_male = { 'id' => [].to_vector, 'age' => [].to_vector, 'name' => [].to_vector }.to_dataset(%w(id age name))
37
- ds_female = { 'id' => [].to_vector, 'age' => [].to_vector, 'name' => [].to_vector }.to_dataset(%w(id age name))
38
- ms2 = Statsample::Multiset.new(%w(id age name))
39
- ms2.add_dataset('male', ds_male)
40
- ms2.add_dataset('female', ds_female)
35
+ ms = Statsample::Multiset.new_empty_vectors([:id, :age, :name], [:male, :female])
36
+ ds_male = Daru::DataFrame.new({
37
+ :id => Daru::Vector.new([]),
38
+ :age => Daru::Vector.new([]),
39
+ :name => Daru::Vector.new([])
40
+ }, order: [:id, :age, :name])
41
+
42
+ ds_female = Daru::DataFrame.new({
43
+ :id => Daru::Vector.new([]),
44
+ :age => Daru::Vector.new([]),
45
+ :name => Daru::Vector.new([])
46
+ }, order: [:id, :age, :name])
47
+
48
+ ms2 = Statsample::Multiset.new([:id, :age, :name])
49
+ ms2.add_dataset(:male, ds_male)
50
+ ms2.add_dataset(:female, ds_female)
41
51
  assert_equal(ms2.fields, ms.fields)
42
- assert_equal(ms2['male'], ms['male'])
43
- assert_equal(ms2['female'], ms['female'])
52
+ assert_equal(ms2[:male], ms[:male])
53
+ assert_equal(ms2[:female], ms[:female])
44
54
  end
45
55
 
46
56
  def test_to_multiset_by_split_one
47
- sex = %w(m m m m m f f f f m).to_vector(:object)
48
- city = %w(London Paris NY London Paris NY London Paris NY Tome).to_vector(:object)
49
- age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:numeric)
50
- ds = { 'sex' => sex, 'city' => city, 'age' => age }.to_dataset
51
- ms = ds.to_multiset_by_split('sex')
57
+ sex = Daru::Vector.new(%w(m m m m m f f f f m))
58
+ city = Daru::Vector.new(%w(London Paris NY London Paris NY London Paris NY Tome))
59
+ age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
60
+ ds = Daru::DataFrame.new({ :sex => sex, :city => city, :age => age })
61
+ ms = ds.to_multiset_by_split(:sex)
52
62
  assert_equal(2, ms.n_datasets)
53
63
  assert_equal(%w(f m), ms.datasets.keys.sort)
54
- assert_equal(6, ms['m'].cases)
55
- assert_equal(4, ms['f'].cases)
56
- assert_equal(%w(London Paris NY London Paris Tome), ms['m']['city'].to_a)
57
- assert_equal([34, 33, 35, 36], ms['f']['age'].to_a)
64
+ assert_equal(6, ms['m'].nrows)
65
+ assert_equal(4, ms['f'].nrows)
66
+ assert_equal(%w(London Paris NY London Paris Tome), ms['m'][:city].to_a)
67
+ assert_equal([34, 33, 35, 36], ms['f'][:age].to_a)
58
68
  end
59
69
 
60
70
  def test_to_multiset_by_split_multiple
61
- sex = %w(m m m m m m m m m m f f f f f f f f f f).to_vector(:object)
62
- city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris).to_vector(:object)
63
- hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black).to_vector(:object)
64
- age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:numeric)
65
- ds = { 'sex' => sex, 'city' => city, 'hair' => hair, 'age' => age }.to_dataset(%w(sex city hair age))
66
- ms = ds.to_multiset_by_split('sex', 'city', 'hair')
71
+ sex = Daru::Vector.new(%w(m m m m m m m m m m f f f f f f f f f f))
72
+ city = Daru::Vector.new(%w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris))
73
+ hair = Daru::Vector.new(%w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black))
74
+ age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
75
+ ds = Daru::DataFrame.new({
76
+ :sex => sex, :city => city, :hair => hair, :age => age
77
+ }, order: [:sex, :city, :hair, :age])
78
+ ms = ds.to_multiset_by_split(:sex, :city, :hair)
67
79
  assert_equal(8, ms.n_datasets)
68
- assert_equal(3, ms[%w(m London blonde)].cases)
69
- assert_equal(3, ms[%w(m London blonde)].cases)
70
- assert_equal(1, ms[%w(m Paris black)].cases)
80
+ assert_equal(3, ms[%w(m London blonde)].nrows)
81
+ assert_equal(3, ms[%w(m London blonde)].nrows)
82
+ assert_equal(1, ms[%w(m Paris black)].nrows)
71
83
  end
72
84
 
73
85
  def test_stratum_proportion
74
- ds1 = { 'q1' => [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0].to_vector }.to_dataset
75
- ds2 = { 'q1' => [1, 1, 1, 1, 1, 1, 1, 0, 0].to_vector }.to_dataset
76
- assert_equal(5.0 / 12, ds1['q1'].proportion)
77
- assert_equal(7.0 / 9, ds2['q1'].proportion)
78
- ms = Statsample::Multiset.new(['q1'])
79
- ms.add_dataset('d1', ds1)
80
- ms.add_dataset('d2', ds2)
81
- ss = Statsample::StratifiedSample.new(ms, 'd1' => 50, 'd2' => 100)
82
- assert_in_delta(0.655, ss.proportion('q1'), 0.01)
83
- assert_in_delta(0.345, ss.proportion('q1', 0), 0.01)
86
+ ds1 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) })
87
+ ds2 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 1, 1, 0, 0]) })
88
+ assert_equal(5.0 / 12, ds1[:q1].proportion)
89
+ assert_equal(7.0 / 9, ds2[:q1].proportion)
90
+ ms = Statsample::Multiset.new([:q1])
91
+ ms.add_dataset(:d1, ds1)
92
+ ms.add_dataset(:d2, ds2)
93
+ ss = Statsample::StratifiedSample.new(ms, :d1 => 50, :d2 => 100)
94
+ assert_in_delta(0.655, ss.proportion(:q1), 0.01)
95
+ assert_in_delta(0.345, ss.proportion(:q1, 0), 0.01)
84
96
  end
85
97
 
86
98
  def test_stratum_scale
87
- boys = { 'test' => [50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:numeric) }.to_dataset
88
- girls = { 'test' => [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:numeric) }.to_dataset
89
- ms = Statsample::Multiset.new(['test'])
90
- ms.add_dataset('boys', boys)
91
- ms.add_dataset('girls', girls)
92
- ss = Statsample::StratifiedSample.new(ms, 'boys' => 10_000, 'girls' => 10_000)
99
+ boys = Daru::DataFrame.new({ :test => Daru::Vector.new([50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90]) })
100
+ girls =Daru::DataFrame.new({ :test => Daru::Vector.new( [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90]) })
101
+ ms = Statsample::Multiset.new([:test])
102
+ ms.add_dataset(:boys, boys)
103
+ ms.add_dataset(:girls, girls)
104
+ ss = Statsample::StratifiedSample.new(ms, :boys => 10_000, :girls => 10_000)
93
105
  assert_equal(2, ss.strata_number)
94
106
  assert_equal(20_000, ss.population_size)
95
- assert_equal(10_000, ss.stratum_size('boys'))
96
- assert_equal(10_000, ss.stratum_size('girls'))
107
+ assert_equal(10_000, ss.stratum_size(:boys))
108
+ assert_equal(10_000, ss.stratum_size(:girls))
97
109
  assert_equal(36, ss.sample_size)
98
- assert_equal(75, ss.mean('test'))
99
- assert_in_delta(1.45, ss.standard_error_wor('test'), 0.01)
100
- assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'), 0.00001)
110
+ assert_equal(75, ss.mean(:test))
111
+ assert_in_delta(1.45, ss.standard_error_wor(:test), 0.01)
112
+ assert_in_delta(ss.standard_error_wor(:test), ss.standard_error_wor_2(:test), 0.00001)
101
113
  end
102
114
 
103
115
  def test_each
104
116
  xpe = {
105
- 'a' => %w(a a a a).to_vector,
106
- 'b' => %w(b b b b).to_vector
117
+ 'a' => Daru::Vector.new(%w(a a a a)),
118
+ 'b' => Daru::Vector.new(%w(b b b b))
107
119
  }
108
120
  ype = {
109
- 'a' => [1, 2, 3, 4].to_numeric,
110
- 'b' => [5, 6, 7, 8].to_numeric
121
+ 'a' => Daru::Vector.new([1, 2, 3, 4]),
122
+ 'b' => Daru::Vector.new([5, 6, 7, 8])
111
123
  }
112
124
  zpe = {
113
- 'a' => [10, 11, 12, 13].to_numeric,
114
- 'b' => [14, 15, 16, 17].to_numeric
125
+ 'a' => Daru::Vector.new([10, 11, 12, 13]),
126
+ 'b' => Daru::Vector.new([14, 15, 16, 17])
115
127
  }
116
128
  xp, yp, zp = {}, {}, {}
117
129
  @ms.each {|k, ds|
118
- xp[k] = ds['x']
119
- yp[k] = ds['y']
120
- zp[k] = ds['z']
130
+ xp[k] = ds[:x]
131
+ yp[k] = ds[:y]
132
+ zp[k] = ds[:z]
121
133
  }
122
134
  assert_equal(xpe, xp)
123
135
  assert_equal(ype, yp)
@@ -127,38 +139,38 @@ class StatsampleMultisetTestCase < Minitest::Test
127
139
  def test_multiset_union_with_block
128
140
  r1 = rand
129
141
  r2 = rand
130
- ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_numeric
142
+ ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
131
143
 
132
- ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_numeric
144
+ ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
133
145
 
134
146
  ds2 = @ms.union {|k, ds|
135
- ds['y'].recode!{|v|
147
+ ds[:y].recode!{|v|
136
148
  k == 'a' ? v * r1 : v * r2
137
149
  }
138
- ds['z'].recode!{|v|
150
+ ds[:z].recode!{|v|
139
151
  k == 'a' ? v * r1 : v * r2
140
152
  }
141
153
  }
142
- assert_equal(ye, ds2['y'])
143
- assert_equal(ze, ds2['z'])
154
+ assert_equal(ye, ds2[:y])
155
+ assert_equal(ze, ds2[:z])
144
156
  end
145
157
 
146
158
  def test_multiset_union
147
159
  r1 = rand
148
160
  r2 = rand
149
- ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_numeric
161
+ ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
162
+ ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
150
163
 
151
- ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_numeric
152
- @ms.each {|k, ds|
153
- ds['y'].recode!{|v|
164
+ @ms.each do |k, ds|
165
+ ds[:y].recode! { |v|
154
166
  k == 'a' ? v * r1 : v * r2
155
167
  }
156
- ds['z'].recode!{|v|
168
+ ds[:z].recode! {|v|
157
169
  k == 'a' ? v * r1 : v * r2
158
170
  }
159
- }
171
+ end
160
172
  ds2 = @ms.union
161
- assert_equal(ye, ds2['y'])
162
- assert_equal(ze, ds2['z'])
173
+ assert_equal(ye, ds2[:y])
174
+ assert_equal(ze, ds2[:z])
163
175
  end
164
176
  end