statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -7,26 +7,32 @@ class StatsampleFactorTestCase < Minitest::Test
7
7
  # Based on Hardle and Simar
8
8
  def setup
9
9
  @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
10
+ Daru.lazy_update = true
11
+ end
12
+
13
+ def teardown
14
+ Daru.lazy_update = false
10
15
  end
11
16
  # Based on Hurdle example
12
17
  def test_covariance_matrix
13
- ds = Statsample::PlainText.read(@fixtures_dir + '/bank2.dat', %w(v1 v2 v3 v4 v5 v6))
14
- ds.fields.each {|f|
15
- ds[f] = ds[f].centered
18
+ ds = Daru::DataFrame.from_plaintext(@fixtures_dir + '/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6])
19
+ ds.vectors.each {|f|
20
+ ds[f] = ds[f].center
16
21
  }
17
- cm = ds.covariance_matrix
22
+ ds.update
23
+ cm = Statsample::Bivariate.covariance_matrix ds
18
24
  pca = Statsample::Factor::PCA.new(cm, m: 6)
19
25
  # puts pca.summary
20
26
  # puts pca.feature_matrix
21
- exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].to_numeric
22
- assert_similar_vector(exp_eig, pca.eigenvalues.to_numeric, 0.1)
27
+ exp_eig = Daru::Vector.new([2.985, 0.931, 0.242, 0.194, 0.085, 0.035])
28
+ assert_similar_vector(exp_eig, Daru::Vector.new(pca.eigenvalues), 0.1)
23
29
  pcs = pca.principal_components(ds)
24
30
  k = 6
25
31
  comp_matrix = pca.component_matrix
26
32
  k.times {|i|
27
- pc_id = "PC_#{i + 1}"
33
+ pc_id = "PC_#{i + 1}".to_sym
28
34
  k.times {|j| # variable
29
- ds_id = "v#{j + 1}"
35
+ ds_id = "v#{j + 1}".to_sym
30
36
  r = Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
31
37
  assert_in_delta(r, comp_matrix[j, i])
32
38
  }
@@ -42,13 +48,13 @@ class StatsampleFactorTestCase < Minitest::Test
42
48
  samples = 20
43
49
  [3, 5, 7].each {|k|
44
50
  v = {}
45
- v['x0'] = samples.times.map { ran.call }.to_numeric.centered
46
- (1...k).each {|i|
47
- v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.to_numeric.centered
51
+ v[:x0] = Daru::Vector.new(samples.times.map { ran.call }).center
52
+ (1...k).each { |i|
53
+ v["x#{i}".to_sym] = Daru::Vector.new(samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}".to_sym][ii] * 0.5 }).center
48
54
  }
49
55
 
50
- ds = v.to_dataset
51
- cm = ds.covariance_matrix
56
+ ds = Daru::DataFrame.new(v)
57
+ cm = Statsample::Bivariate.covariance_matrix ds
52
58
  # @r.assign('ds',ds)
53
59
  # @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
54
60
  # puts "eigenvalues"
@@ -61,14 +67,14 @@ class StatsampleFactorTestCase < Minitest::Test
61
67
  cm_ruby = pca_ruby.component_matrix
62
68
  # puts cm_ruby.summary
63
69
  k.times {|i|
64
- pc_id = "PC_#{i + 1}"
70
+ pc_id = "PC_#{i + 1}".to_sym
65
71
  assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
66
72
  # Revert gsl component values
67
73
  pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
68
74
  assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
69
75
  if false
70
76
  k.times {|j| # variable
71
- ds_id = "x#{j}"
77
+ ds_id = "x#{j}".to_sym
72
78
  r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
73
79
  puts "#{pc_id}-#{ds_id}:#{r}"
74
80
  }
@@ -80,18 +86,22 @@ class StatsampleFactorTestCase < Minitest::Test
80
86
  end
81
87
 
82
88
  def test_principalcomponents
83
- principalcomponents(true) if Statsample.has_gsl?
89
+ if Statsample.has_gsl?
90
+ principalcomponents(true)
91
+ else
92
+ skip "Require GSL"
93
+ end
84
94
  principalcomponents(false)
85
95
  end
86
96
 
87
97
  def principalcomponents(gsl)
88
98
  ran = Distribution::Normal.rng
89
99
  samples = 50
90
- x1 = samples.times.map { ran.call }.to_numeric
91
- x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.to_numeric
92
- ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
100
+ x1 = Daru::Vector.new(samples.times.map { ran.call })
101
+ x2 = Daru::Vector.new(samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 })
102
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
93
103
 
94
- cm = ds.correlation_matrix
104
+ cm = Statsample::Bivariate.correlation_matrix ds
95
105
  r = cm[0, 1]
96
106
  pca = Statsample::Factor::PCA.new(cm, m: 2, use_gsl: gsl)
97
107
  assert_in_delta(1 + r, pca.eigenvalues[0], 1e-10)
@@ -103,14 +113,14 @@ class StatsampleFactorTestCase < Minitest::Test
103
113
  assert_equal_vector(hs * m_1, pca.eigenvectors[1])
104
114
 
105
115
  pcs = pca.principal_components(ds)
106
- exp_pc_1 = ds.collect_with_index {|row, _i|
107
- hs * (row['x1'] + row['x2'])
116
+ exp_pc_1 = ds.collect_row_with_index {|row, _i|
117
+ hs * (row[:x1] + row[:x2])
108
118
  }
109
- exp_pc_2 = ds.collect_with_index {|row, _i|
110
- gsl ? hs * (row['x2'] - row['x1']) : hs * (row['x1'] - row['x2'])
119
+ exp_pc_2 = ds.collect_row_with_index {|row, _i|
120
+ gsl ? hs * (row[:x2] - row[:x1]) : hs * (row[:x1] - row[:x2])
111
121
  }
112
- assert_similar_vector(exp_pc_1, pcs['PC_1'])
113
- assert_similar_vector(exp_pc_2, pcs['PC_2'])
122
+ assert_similar_vector(exp_pc_1, pcs[:PC_1])
123
+ assert_similar_vector(exp_pc_2, pcs[:PC_2])
114
124
  end
115
125
 
116
126
  def test_antiimage
@@ -121,11 +131,11 @@ class StatsampleFactorTestCase < Minitest::Test
121
131
  end
122
132
 
123
133
  def test_kmo
124
- @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric
125
- @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric
126
- @v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric
134
+ @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
135
+ @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
136
+ @v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
127
137
  # KMO: 0.490
128
- ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
138
+ ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
129
139
  cor = Statsample::Bivariate.correlation_matrix(ds)
130
140
  kmo = Statsample::Factor.kmo(cor)
131
141
  assert_in_delta(0.667, kmo, 0.001)
@@ -141,12 +151,12 @@ class StatsampleFactorTestCase < Minitest::Test
141
151
  end
142
152
  # Tested with SPSS and R
143
153
  def test_pca
144
-
145
- a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_numeric
146
- b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_numeric
147
- a.recode! { |c| c - a.mean }
148
- b.recode! { |c| c - b.mean }
149
- ds = { 'a' => a, 'b' => b }.to_dataset
154
+ dtype = Statsample.has_gsl? ? :gsl : :array
155
+ a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1], dtype: dtype)
156
+ b = Daru::Vector.new([2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9], dtype: dtype)
157
+ a = a - a.mean
158
+ b = b - b.mean
159
+ ds = Daru::DataFrame.new({ :a => a, :b => b })
150
160
 
151
161
  cov_matrix = Statsample::Bivariate.covariance_matrix(ds)
152
162
  if Statsample.has_gsl?
@@ -160,8 +170,6 @@ class StatsampleFactorTestCase < Minitest::Test
160
170
  end
161
171
 
162
172
  def pca_set(pca, _type)
163
-
164
-
165
173
  expected_eigenvalues = [1.284, 0.0490]
166
174
  expected_eigenvalues.each_with_index{|ev, i|
167
175
  assert_in_delta(ev, pca.eigenvalues[i], 0.001)
@@ -7,6 +7,11 @@ class StatsampleFactorTestCase < Minitest::Test
7
7
  # Based on Hardle and Simar
8
8
  def setup
9
9
  @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
10
+ Daru.lazy_update = true
11
+ end
12
+
13
+ def teardown
14
+ Daru.lazy_update = false
10
15
  end
11
16
 
12
17
  def test_parallelanalysis_with_data
@@ -15,26 +20,30 @@ class StatsampleFactorTestCase < Minitest::Test
15
20
  variables = 10
16
21
  iterations = 50
17
22
  rng = Distribution::Normal.rng
18
- f1 = samples.times.collect { rng.call }.to_numeric
19
- f2 = samples.times.collect { rng.call }.to_numeric
23
+ f1 = Daru::Vector.new(samples.times.collect { rng.call })
24
+ f2 = Daru::Vector.new(samples.times.collect { rng.call })
20
25
  vectors = {}
21
26
  variables.times do |i|
22
27
  if i < 5
23
- vectors["v#{i}"] = samples.times.collect {|nv|
24
- f1[nv] * 5 + f2[nv] * 2 + rng.call
25
- }.to_numeric
28
+ vectors["v#{i}".to_sym] = Daru::Vector.new(
29
+ samples.times.collect { |nv|
30
+ f1[nv] * 5 + f2[nv] * 2 + rng.call
31
+ }
32
+ )
26
33
  else
27
- vectors["v#{i}"] = samples.times.collect {|nv|
28
- f2[nv] * 5 + f1[nv] * 2 + rng.call
29
- }.to_numeric
34
+ vectors["v#{i}".to_sym] = Daru::Vector.new(
35
+ samples.times.collect { |nv|
36
+ f2[nv] * 5 + f1[nv] * 2 + rng.call
37
+ }
38
+ )
30
39
  end
31
40
  end
32
- ds = vectors.to_dataset
41
+ ds = Daru::DataFrame.new(vectors)
33
42
 
34
43
  pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
35
44
  pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
36
45
  3.times do |n|
37
- var = "ev_0000#{n + 1}"
46
+ var = "ev_0000#{n + 1}".to_sym
38
47
  assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.05)
39
48
  end
40
49
  else
@@ -44,9 +53,9 @@ class StatsampleFactorTestCase < Minitest::Test
44
53
 
45
54
  def test_parallelanalysis
46
55
  pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
47
- assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
48
- assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
49
- assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
56
+ assert_in_delta(1.2454, pa.ds_eigenvalues[:ev_00001].mean, 0.01)
57
+ assert_in_delta(1.1542, pa.ds_eigenvalues[:ev_00002].mean, 0.01)
58
+ assert_in_delta(1.0836, pa.ds_eigenvalues[:ev_00003].mean, 0.01)
50
59
  assert(pa.summary.size > 0)
51
60
  end
52
61
  end
@@ -2,11 +2,11 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  require 'ostruct'
3
3
  class StatsampleGGobiTestCase < Minitest::Test
4
4
  def setup
5
- v1 = ([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10).to_vector(:numeric)
6
- @v2 = (%w(a b c a a a b b c d) * 10).to_vector(:object)
5
+ v1 = Daru::Vector.new([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10)
6
+ @v2 = Daru::Vector.new(%w(a b c a a a b b c d) * 10)
7
7
  @v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
8
- v3 = ([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10).to_vector(:numeric)
9
- @ds = { 'v1' => v1, 'v2' => @v2, 'v3' => v3 }.to_dataset
8
+ v3 = Daru::Vector.new([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10)
9
+ @ds = Daru::DataFrame.new({ :v1 => v1, :v2 => @v2, :v3 => v3 })
10
10
  end
11
11
 
12
12
  def test_values_definition
@@ -1,10 +1,10 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleGSLTestCase < Minitest::Test
3
3
  should_with_gsl 'matrix with gsl' do
4
- a = [1, 2, 3, 4, 20].to_vector(:numeric)
5
- b = [3, 2, 3, 4, 50].to_vector(:numeric)
6
- c = [6, 2, 3, 4, 3].to_vector(:numeric)
7
- ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
4
+ a = Daru::Vector.new([1, 2, 3, 4, 20])
5
+ b = Daru::Vector.new([3, 2, 3, 4, 50])
6
+ c = Daru::Vector.new([6, 2, 3, 4, 3])
7
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
8
8
  gsl = ds.to_matrix.to_gsl
9
9
  assert_equal(5, gsl.size1)
10
10
  assert_equal(3, gsl.size2)
@@ -75,13 +75,13 @@ class StatsampleHistogramTestCase < Minitest::Test
75
75
  assert_equal(min, h.min_val)
76
76
  end
77
77
  should 'return correct estimated mean' do
78
- a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5].to_numeric
78
+ a = Daru::Vector.new([1.5, 1.5, 1.5, 3.5, 3.5, 3.5])
79
79
  h = Statsample::Histogram.alloc(5, [0, 5])
80
80
  h.increment(a)
81
81
  assert_equal(2.5, h.estimated_mean)
82
82
  end
83
83
  should 'return correct estimated standard deviation' do
84
- a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5].to_numeric
84
+ a = Daru::Vector.new([0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5])
85
85
  h = Statsample::Histogram.alloc(5, [0, 5])
86
86
  h.increment(a)
87
87
  assert_equal(a.sd, h.estimated_standard_deviation)
@@ -100,7 +100,7 @@ class StatsampleHistogramTestCase < Minitest::Test
100
100
  end
101
101
  should 'not raise exception when all values equal' do
102
102
  assert_nothing_raised do
103
- a = [5, 5, 5, 5, 5, 5].to_numeric
103
+ a = Daru::Vector.new([5, 5, 5, 5, 5, 5])
104
104
  h = Statsample::Graph::Histogram.new(a)
105
105
  h.to_svg
106
106
  end
@@ -4,17 +4,17 @@ class StatsampleMatrixTestCase < Minitest::Test
4
4
  def test_to_dataset
5
5
  m = Matrix[[1, 4], [2, 5], [3, 6]]
6
6
  m.extend Statsample::NamedMatrix
7
- m.fields_y = %w(x1 x2)
7
+ m.fields_y = [:x1, :x2]
8
8
  m.name = 'test'
9
9
  samples = 100
10
- x1 = [1, 2, 3].to_numeric
11
- x2 = [4, 5, 6].to_numeric
12
- ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
13
- ds.name = 'test'
14
- obs = m.to_dataset
15
- assert_equal(ds['x1'], obs['x1'])
16
- assert_equal(ds['x2'], obs['x2'])
17
- assert_equal(ds['x1'].mean, obs['x1'].mean)
10
+ x1 =Daru::Vector.new([1, 2, 3])
11
+ x2 =Daru::Vector.new([4, 5, 6])
12
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
13
+ ds.rename 'test'
14
+ obs = m.to_dataframe
15
+ assert_equal(ds[:x1], obs[:x1])
16
+ assert_equal(ds[:x2], obs[:x2])
17
+ assert_equal(ds[:x1].mean, obs[:x1].mean)
18
18
  end
19
19
 
20
20
  def test_covariate
@@ -33,10 +33,10 @@ class StatsampleMatrixTestCase < Minitest::Test
33
33
 
34
34
  assert_equal(:covariance, a._type)
35
35
 
36
- a = 50.times.collect { rand }.to_numeric
37
- b = 50.times.collect { rand }.to_numeric
38
- c = 50.times.collect { rand }.to_numeric
39
- ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
36
+ a = Daru::Vector.new(50.times.collect { rand })
37
+ b = Daru::Vector.new(50.times.collect { rand })
38
+ c = Daru::Vector.new(50.times.collect { rand })
39
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
40
40
  corr = Statsample::Bivariate.correlation_matrix(ds)
41
41
  real = Statsample::Bivariate.covariance_matrix(ds).correlation
42
42
  corr.row_size.times do |i|
@@ -2,122 +2,134 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
 
3
3
  class StatsampleMultisetTestCase < Minitest::Test
4
4
  def setup
5
- @x = %w(a a a a b b b b).to_vector
6
- @y = [1, 2, 3, 4, 5, 6, 7, 8].to_numeric
7
- @z = [10, 11, 12, 13, 14, 15, 16, 17].to_numeric
8
- @ds = { 'x' => @x, 'y' => @y, 'z' => @z }.to_dataset
9
- @ms = @ds.to_multiset_by_split('x')
5
+ @x = Daru::Vector.new(%w(a a a a b b b b))
6
+ @y = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8])
7
+ @z = Daru::Vector.new([10, 11, 12, 13, 14, 15, 16, 17])
8
+ @ds = Daru::DataFrame.new({ :x => @x, :y => @y, :z => @z })
9
+ @ms = @ds.to_multiset_by_split(:x)
10
10
  end
11
11
 
12
12
  def test_creation
13
- v1a = [1, 2, 3, 4, 5].to_vector
14
- v2b = [11, 21, 31, 41, 51].to_vector
15
- v3c = [21, 23, 34, 45, 56].to_vector
16
- ds1 = { 'v1' => v1a, 'v2' => v2b, 'v3' => v3c }.to_dataset
17
- v1b = [15, 25, 35, 45, 55].to_vector
18
- v2b = [11, 21, 31, 41, 51].to_vector
19
- v3b = [21, 23, 34, 45, 56].to_vector
20
- ds2 = { 'v1' => v1b, 'v2' => v2b, 'v3' => v3b }.to_dataset
21
- ms = Statsample::Multiset.new(%w(v1 v2 v3))
22
- ms.add_dataset('ds1', ds1)
23
- ms.add_dataset('ds2', ds2)
24
- assert_equal(ds1, ms['ds1'])
25
- assert_equal(ds2, ms['ds2'])
26
- assert_equal(v1a, ms['ds1']['v1'])
27
- assert_not_equal(v1b, ms['ds1']['v1'])
28
- ds3 = { 'v1' => v1b, 'v2' => v2b }.to_dataset
13
+ v1a = Daru::Vector.new([1, 2, 3, 4, 5])
14
+ v2b = Daru::Vector.new([11, 21, 31, 41, 51])
15
+ v3c = Daru::Vector.new([21, 23, 34, 45, 56])
16
+ ds1 = Daru::DataFrame.new({ :v1 => v1a, :v2 => v2b, :v3 => v3c })
17
+ v1b = Daru::Vector.new([15, 25, 35, 45, 55])
18
+ v2b = Daru::Vector.new([11, 21, 31, 41, 51])
19
+ v3b = Daru::Vector.new([21, 23, 34, 45, 56])
20
+ ds2 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b, :v3 => v3b })
21
+ ms = Statsample::Multiset.new([:v1, :v2, :v3])
22
+ ms.add_dataset(:ds1, ds1)
23
+ ms.add_dataset(:ds2, ds2)
24
+ assert_equal(ds1, ms[:ds1])
25
+ assert_equal(ds2, ms[:ds2])
26
+ assert_equal(v1a, ms[:ds1][:v1])
27
+ assert_not_equal(v1b, ms[:ds1][:v1])
28
+ ds3 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b })
29
29
  assert_raise ArgumentError do
30
30
  ms.add_dataset(ds3)
31
31
  end
32
32
  end
33
33
 
34
34
  def test_creation_empty
35
- ms = Statsample::Multiset.new_empty_vectors(%w(id age name), %w(male female))
36
- ds_male = { 'id' => [].to_vector, 'age' => [].to_vector, 'name' => [].to_vector }.to_dataset(%w(id age name))
37
- ds_female = { 'id' => [].to_vector, 'age' => [].to_vector, 'name' => [].to_vector }.to_dataset(%w(id age name))
38
- ms2 = Statsample::Multiset.new(%w(id age name))
39
- ms2.add_dataset('male', ds_male)
40
- ms2.add_dataset('female', ds_female)
35
+ ms = Statsample::Multiset.new_empty_vectors([:id, :age, :name], [:male, :female])
36
+ ds_male = Daru::DataFrame.new({
37
+ :id => Daru::Vector.new([]),
38
+ :age => Daru::Vector.new([]),
39
+ :name => Daru::Vector.new([])
40
+ }, order: [:id, :age, :name])
41
+
42
+ ds_female = Daru::DataFrame.new({
43
+ :id => Daru::Vector.new([]),
44
+ :age => Daru::Vector.new([]),
45
+ :name => Daru::Vector.new([])
46
+ }, order: [:id, :age, :name])
47
+
48
+ ms2 = Statsample::Multiset.new([:id, :age, :name])
49
+ ms2.add_dataset(:male, ds_male)
50
+ ms2.add_dataset(:female, ds_female)
41
51
  assert_equal(ms2.fields, ms.fields)
42
- assert_equal(ms2['male'], ms['male'])
43
- assert_equal(ms2['female'], ms['female'])
52
+ assert_equal(ms2[:male], ms[:male])
53
+ assert_equal(ms2[:female], ms[:female])
44
54
  end
45
55
 
46
56
  def test_to_multiset_by_split_one
47
- sex = %w(m m m m m f f f f m).to_vector(:object)
48
- city = %w(London Paris NY London Paris NY London Paris NY Tome).to_vector(:object)
49
- age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:numeric)
50
- ds = { 'sex' => sex, 'city' => city, 'age' => age }.to_dataset
51
- ms = ds.to_multiset_by_split('sex')
57
+ sex = Daru::Vector.new(%w(m m m m m f f f f m))
58
+ city = Daru::Vector.new(%w(London Paris NY London Paris NY London Paris NY Tome))
59
+ age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
60
+ ds = Daru::DataFrame.new({ :sex => sex, :city => city, :age => age })
61
+ ms = ds.to_multiset_by_split(:sex)
52
62
  assert_equal(2, ms.n_datasets)
53
63
  assert_equal(%w(f m), ms.datasets.keys.sort)
54
- assert_equal(6, ms['m'].cases)
55
- assert_equal(4, ms['f'].cases)
56
- assert_equal(%w(London Paris NY London Paris Tome), ms['m']['city'].to_a)
57
- assert_equal([34, 33, 35, 36], ms['f']['age'].to_a)
64
+ assert_equal(6, ms['m'].nrows)
65
+ assert_equal(4, ms['f'].nrows)
66
+ assert_equal(%w(London Paris NY London Paris Tome), ms['m'][:city].to_a)
67
+ assert_equal([34, 33, 35, 36], ms['f'][:age].to_a)
58
68
  end
59
69
 
60
70
  def test_to_multiset_by_split_multiple
61
- sex = %w(m m m m m m m m m m f f f f f f f f f f).to_vector(:object)
62
- city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris).to_vector(:object)
63
- hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black).to_vector(:object)
64
- age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:numeric)
65
- ds = { 'sex' => sex, 'city' => city, 'hair' => hair, 'age' => age }.to_dataset(%w(sex city hair age))
66
- ms = ds.to_multiset_by_split('sex', 'city', 'hair')
71
+ sex = Daru::Vector.new(%w(m m m m m m m m m m f f f f f f f f f f))
72
+ city = Daru::Vector.new(%w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris))
73
+ hair = Daru::Vector.new(%w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black))
74
+ age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
75
+ ds = Daru::DataFrame.new({
76
+ :sex => sex, :city => city, :hair => hair, :age => age
77
+ }, order: [:sex, :city, :hair, :age])
78
+ ms = ds.to_multiset_by_split(:sex, :city, :hair)
67
79
  assert_equal(8, ms.n_datasets)
68
- assert_equal(3, ms[%w(m London blonde)].cases)
69
- assert_equal(3, ms[%w(m London blonde)].cases)
70
- assert_equal(1, ms[%w(m Paris black)].cases)
80
+ assert_equal(3, ms[%w(m London blonde)].nrows)
81
+ assert_equal(3, ms[%w(m London blonde)].nrows)
82
+ assert_equal(1, ms[%w(m Paris black)].nrows)
71
83
  end
72
84
 
73
85
  def test_stratum_proportion
74
- ds1 = { 'q1' => [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0].to_vector }.to_dataset
75
- ds2 = { 'q1' => [1, 1, 1, 1, 1, 1, 1, 0, 0].to_vector }.to_dataset
76
- assert_equal(5.0 / 12, ds1['q1'].proportion)
77
- assert_equal(7.0 / 9, ds2['q1'].proportion)
78
- ms = Statsample::Multiset.new(['q1'])
79
- ms.add_dataset('d1', ds1)
80
- ms.add_dataset('d2', ds2)
81
- ss = Statsample::StratifiedSample.new(ms, 'd1' => 50, 'd2' => 100)
82
- assert_in_delta(0.655, ss.proportion('q1'), 0.01)
83
- assert_in_delta(0.345, ss.proportion('q1', 0), 0.01)
86
+ ds1 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) })
87
+ ds2 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 1, 1, 0, 0]) })
88
+ assert_equal(5.0 / 12, ds1[:q1].proportion)
89
+ assert_equal(7.0 / 9, ds2[:q1].proportion)
90
+ ms = Statsample::Multiset.new([:q1])
91
+ ms.add_dataset(:d1, ds1)
92
+ ms.add_dataset(:d2, ds2)
93
+ ss = Statsample::StratifiedSample.new(ms, :d1 => 50, :d2 => 100)
94
+ assert_in_delta(0.655, ss.proportion(:q1), 0.01)
95
+ assert_in_delta(0.345, ss.proportion(:q1, 0), 0.01)
84
96
  end
85
97
 
86
98
  def test_stratum_scale
87
- boys = { 'test' => [50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:numeric) }.to_dataset
88
- girls = { 'test' => [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:numeric) }.to_dataset
89
- ms = Statsample::Multiset.new(['test'])
90
- ms.add_dataset('boys', boys)
91
- ms.add_dataset('girls', girls)
92
- ss = Statsample::StratifiedSample.new(ms, 'boys' => 10_000, 'girls' => 10_000)
99
+ boys = Daru::DataFrame.new({ :test => Daru::Vector.new([50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90]) })
100
+ girls =Daru::DataFrame.new({ :test => Daru::Vector.new( [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90]) })
101
+ ms = Statsample::Multiset.new([:test])
102
+ ms.add_dataset(:boys, boys)
103
+ ms.add_dataset(:girls, girls)
104
+ ss = Statsample::StratifiedSample.new(ms, :boys => 10_000, :girls => 10_000)
93
105
  assert_equal(2, ss.strata_number)
94
106
  assert_equal(20_000, ss.population_size)
95
- assert_equal(10_000, ss.stratum_size('boys'))
96
- assert_equal(10_000, ss.stratum_size('girls'))
107
+ assert_equal(10_000, ss.stratum_size(:boys))
108
+ assert_equal(10_000, ss.stratum_size(:girls))
97
109
  assert_equal(36, ss.sample_size)
98
- assert_equal(75, ss.mean('test'))
99
- assert_in_delta(1.45, ss.standard_error_wor('test'), 0.01)
100
- assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'), 0.00001)
110
+ assert_equal(75, ss.mean(:test))
111
+ assert_in_delta(1.45, ss.standard_error_wor(:test), 0.01)
112
+ assert_in_delta(ss.standard_error_wor(:test), ss.standard_error_wor_2(:test), 0.00001)
101
113
  end
102
114
 
103
115
  def test_each
104
116
  xpe = {
105
- 'a' => %w(a a a a).to_vector,
106
- 'b' => %w(b b b b).to_vector
117
+ 'a' => Daru::Vector.new(%w(a a a a)),
118
+ 'b' => Daru::Vector.new(%w(b b b b))
107
119
  }
108
120
  ype = {
109
- 'a' => [1, 2, 3, 4].to_numeric,
110
- 'b' => [5, 6, 7, 8].to_numeric
121
+ 'a' => Daru::Vector.new([1, 2, 3, 4]),
122
+ 'b' => Daru::Vector.new([5, 6, 7, 8])
111
123
  }
112
124
  zpe = {
113
- 'a' => [10, 11, 12, 13].to_numeric,
114
- 'b' => [14, 15, 16, 17].to_numeric
125
+ 'a' => Daru::Vector.new([10, 11, 12, 13]),
126
+ 'b' => Daru::Vector.new([14, 15, 16, 17])
115
127
  }
116
128
  xp, yp, zp = {}, {}, {}
117
129
  @ms.each {|k, ds|
118
- xp[k] = ds['x']
119
- yp[k] = ds['y']
120
- zp[k] = ds['z']
130
+ xp[k] = ds[:x]
131
+ yp[k] = ds[:y]
132
+ zp[k] = ds[:z]
121
133
  }
122
134
  assert_equal(xpe, xp)
123
135
  assert_equal(ype, yp)
@@ -127,38 +139,38 @@ class StatsampleMultisetTestCase < Minitest::Test
127
139
  def test_multiset_union_with_block
128
140
  r1 = rand
129
141
  r2 = rand
130
- ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_numeric
142
+ ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
131
143
 
132
- ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_numeric
144
+ ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
133
145
 
134
146
  ds2 = @ms.union {|k, ds|
135
- ds['y'].recode!{|v|
147
+ ds[:y].recode!{|v|
136
148
  k == 'a' ? v * r1 : v * r2
137
149
  }
138
- ds['z'].recode!{|v|
150
+ ds[:z].recode!{|v|
139
151
  k == 'a' ? v * r1 : v * r2
140
152
  }
141
153
  }
142
- assert_equal(ye, ds2['y'])
143
- assert_equal(ze, ds2['z'])
154
+ assert_equal(ye, ds2[:y])
155
+ assert_equal(ze, ds2[:z])
144
156
  end
145
157
 
146
158
  def test_multiset_union
147
159
  r1 = rand
148
160
  r2 = rand
149
- ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_numeric
161
+ ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
162
+ ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
150
163
 
151
- ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_numeric
152
- @ms.each {|k, ds|
153
- ds['y'].recode!{|v|
164
+ @ms.each do |k, ds|
165
+ ds[:y].recode! { |v|
154
166
  k == 'a' ? v * r1 : v * r2
155
167
  }
156
- ds['z'].recode!{|v|
168
+ ds[:z].recode! {|v|
157
169
  k == 'a' ? v * r1 : v * r2
158
170
  }
159
- }
171
+ end
160
172
  ds2 = @ms.union
161
- assert_equal(ye, ds2['y'])
162
- assert_equal(ze, ds2['z'])
173
+ assert_equal(ye, ds2[:y])
174
+ assert_equal(ze, ds2[:z])
163
175
  end
164
176
  end