statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -3,21 +3,21 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
3
3
  class StatsampleRegressionTestCase < Minitest::Test
4
4
  context 'Example with missing data' do
5
5
  setup do
6
- @x = [0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857].to_numeric
6
+ @x = Daru::Vector.new([0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857])
7
7
 
8
- @y = [nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil].to_numeric
9
- @ds = { 'x' => @x, 'y' => @y }.to_dataset
10
- @lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, 'y')
8
+ @y = Daru::Vector.new([nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil])
9
+ @ds = Daru::DataFrame.new({ :x => @x, :y => @y })
10
+ @lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, :y)
11
11
  end
12
12
  should 'have correct values' do
13
13
  assert_in_delta(0.455, @lr.r2, 0.001)
14
14
  assert_in_delta(0.427, @lr.r2_adjusted, 0.001)
15
15
  assert_in_delta(0.1165, @lr.se_estimate, 0.001)
16
16
  assert_in_delta(15.925, @lr.f, 0.0001)
17
- assert_in_delta(0.675, @lr.standarized_coeffs['x'], 0.001)
18
- assert_in_delta(0.778, @lr.coeffs['x'], 0.001, 'coeff x')
17
+ assert_in_delta(0.675, @lr.standarized_coeffs[:x], 0.001)
18
+ assert_in_delta(0.778, @lr.coeffs[:x], 0.001, 'coeff x')
19
19
  assert_in_delta(0.132, @lr.constant, 0.001, 'constant')
20
- assert_in_delta(0.195, @lr.coeffs_se['x'], 0.001, 'coeff x se')
20
+ assert_in_delta(0.195, @lr.coeffs_se[:x], 0.001, 'coeff x se')
21
21
  assert_in_delta(0.064, @lr.constant_se, 0.001, 'constant se')
22
22
  end
23
23
  end
@@ -26,24 +26,24 @@ class StatsampleRegressionTestCase < Minitest::Test
26
26
 
27
27
  a, b = rand, rand
28
28
 
29
- x1 = samples.times.map { rand }.to_numeric
30
- x2 = samples.times.map { rand }.to_numeric
31
- x3 = samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) }.to_numeric
32
- y = samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand }.to_numeric
29
+ x1 = Daru::Vector.new(samples.times.map { rand })
30
+ x2 = Daru::Vector.new(samples.times.map { rand })
31
+ x3 = Daru::Vector.new(samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) })
32
+ y = Daru::Vector.new(samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand })
33
33
 
34
- ds = { 'x1' => x1, 'x2' => x2, 'x3' => x3, 'y' => y }.to_dataset
34
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2, :x3 => x3, :y => y })
35
35
 
36
36
  assert_raise(Statsample::Regression::LinearDependency) {
37
- Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
37
+ Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
38
38
  }
39
39
  end
40
40
  def test_parameters
41
- @x = [13, 20, 10, 33, 15].to_vector(:numeric)
42
- @y = [23, 18, 35, 10, 27].to_vector(:numeric)
41
+ @x =Daru::Vector.new([13, 20, 10, 33, 15])
42
+ @y =Daru::Vector.new([23, 18, 35, 10, 27])
43
43
  reg = Statsample::Regression::Simple.new_from_vectors(@x, @y)
44
44
  _test_simple_regression(reg)
45
- ds = { 'x' => @x, 'y' => @y }.to_dataset
46
- reg = Statsample::Regression::Simple.new_from_dataset(ds, 'x', 'y')
45
+ ds = Daru::DataFrame.new({ :x => @x, :y => @y })
46
+ reg = Statsample::Regression::Simple.new_from_dataset(ds, :x, :y)
47
47
  _test_simple_regression(reg)
48
48
  reg = Statsample::Regression.simple(@x, @y)
49
49
  _test_simple_regression(reg)
@@ -57,11 +57,11 @@ class StatsampleRegressionTestCase < Minitest::Test
57
57
  end
58
58
 
59
59
  def test_summaries
60
- a = 10.times.map { rand(100) }.to_numeric
61
- b = 10.times.map { rand(100) }.to_numeric
62
- y = 10.times.map { rand(100) }.to_numeric
63
- ds = { 'a' => a, 'b' => b, 'y' => y }.to_dataset
64
- lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
60
+ a = Daru::Vector.new(10.times.map { rand(100) })
61
+ b = Daru::Vector.new(10.times.map { rand(100) })
62
+ y = Daru::Vector.new(10.times.map { rand(100) })
63
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :y => y })
64
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
65
65
  assert(lr.summary.size > 0)
66
66
  end
67
67
 
@@ -87,12 +87,12 @@ class StatsampleRegressionTestCase < Minitest::Test
87
87
  end
88
88
 
89
89
  def test_multiple_regression_pairwise_2
90
- @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3].to_vector(:numeric)
91
- @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2].to_vector(:numeric)
92
- @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7].to_vector(:numeric)
93
- @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil].to_vector(:numeric)
94
- ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
95
- lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
90
+ @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3])
91
+ @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2])
92
+ @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7])
93
+ @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil])
94
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
95
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
96
96
  assert_in_delta(2407.436, lr.sst, 0.001)
97
97
  assert_in_delta(0.752, lr.r, 0.001, 'pairwise r')
98
98
  assert_in_delta(0.565, lr.r2, 0.001)
@@ -103,12 +103,12 @@ class StatsampleRegressionTestCase < Minitest::Test
103
103
 
104
104
  def test_multiple_regression_gsl
105
105
  if Statsample.has_gsl?
106
- @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
107
- @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
108
- @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
109
- @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
110
- ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
111
- lr = Statsample::Regression::Multiple::GslEngine.new(ds, 'y')
106
+ @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
107
+ @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
108
+ @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
109
+ @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
110
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
111
+ lr = Statsample::Regression::Multiple::GslEngine.new(ds, :y)
112
112
  assert(lr.summary.size > 0)
113
113
  model_test(lr, 'gsl')
114
114
  predicted = [1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
@@ -127,8 +127,8 @@ class StatsampleRegressionTestCase < Minitest::Test
127
127
  end
128
128
 
129
129
  def model_test_matrix(lr, name = 'undefined')
130
- stan_coeffs = { 'a' => 0.151, 'b' => -0.547, 'c' => 0.997 }
131
- unstan_coeffs = { 'a' => 0.695, 'b' => -4.286, 'c' => 0.266 }
130
+ stan_coeffs = { :a => 0.151, :b => -0.547, :c => 0.997 }
131
+ unstan_coeffs = { :a => 0.695, :b => -4.286, :c => 0.266 }
132
132
 
133
133
  unstan_coeffs.each_key{|k|
134
134
  assert_in_delta(unstan_coeffs[k], lr.coeffs[k], 0.001, "b coeffs - #{name}")
@@ -145,15 +145,15 @@ class StatsampleRegressionTestCase < Minitest::Test
145
145
 
146
146
  assert_in_delta(20.908, lr.f, 0.001)
147
147
  assert_in_delta(0.001, lr.probability, 0.001)
148
- assert_in_delta(0.226, lr.tolerance('a'), 0.001)
148
+ assert_in_delta(0.226, lr.tolerance(:a), 0.001)
149
149
 
150
- coeffs_se = { 'a' => 1.171, 'b' => 1.129, 'c' => 0.072 }
150
+ coeffs_se = { :a => 1.171, :b => 1.129, :c => 0.072 }
151
151
 
152
152
  ccoeffs_se = lr.coeffs_se
153
153
  coeffs_se.each_key{|k|
154
154
  assert_in_delta(coeffs_se[k], ccoeffs_se[k], 0.001)
155
155
  }
156
- coeffs_t = { 'a' => 0.594, 'b' => -3.796, 'c' => 3.703 }
156
+ coeffs_t = { :a => 0.594, :b => -3.796, :c => 3.703 }
157
157
  ccoeffs_t = lr.coeffs_t
158
158
  coeffs_t.each_key{|k|
159
159
  assert_in_delta(coeffs_t[k], ccoeffs_t[k], 0.001)
@@ -174,32 +174,37 @@ class StatsampleRegressionTestCase < Minitest::Test
174
174
  end
175
175
 
176
176
  def test_regression_matrix
177
- @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
178
- @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
179
- @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
180
- @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
181
- ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
177
+ @a = Daru::Vector.new([1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
178
+ @b = Daru::Vector.new([3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
179
+ @c = Daru::Vector.new([11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
180
+ @y = Daru::Vector.new([3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
181
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
182
182
  cor = Statsample::Bivariate.correlation_matrix(ds)
183
183
 
184
- lr = Statsample::Regression::Multiple::MatrixEngine.new(cor, 'y', y_mean: @y.mean, x_mean: { 'a' => ds['a'].mean, 'b' => ds['b'].mean, 'c' => ds['c'].mean }, cases: @a.size, y_sd: @y.sd, x_sd: { 'a' => @a.sd, 'b' => @b.sd, 'c' => @c.sd })
184
+ lr = Statsample::Regression::Multiple::MatrixEngine.new(
185
+ cor, :y, y_mean: @y.mean,
186
+ x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean },
187
+ cases: @a.size, y_sd: @y.sd, x_sd: { :a => @a.sd, :b => @b.sd, :c => @c.sd })
185
188
  assert_nil(lr.constant_se)
186
189
  assert_nil(lr.constant_t)
187
190
  model_test_matrix(lr, 'correlation matrix')
188
191
 
189
192
  covariance = Statsample::Bivariate.covariance_matrix(ds)
190
- lr = Statsample::Regression::Multiple::MatrixEngine.new(covariance, 'y', y_mean: @y.mean, x_mean: { 'a' => ds['a'].mean, 'b' => ds['b'].mean, 'c' => ds['c'].mean }, cases: @a.size)
193
+ lr = Statsample::Regression::Multiple::MatrixEngine.new(
194
+ covariance, :y, y_mean: @y.mean,
195
+ x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean }, cases: @a.size)
191
196
  assert(lr.summary.size > 0)
192
197
 
193
198
  model_test(lr, 'covariance matrix')
194
199
  end
195
200
 
196
201
  def test_regression_rubyengine
197
- @a = [nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
198
- @b = [nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
199
- @c = [nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
200
- @y = [nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
201
- ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
202
- lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
202
+ @a = Daru::Vector.new([nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
203
+ @b = Daru::Vector.new([nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
204
+ @c = Daru::Vector.new([nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
205
+ @y = Daru::Vector.new([nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
206
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
207
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
203
208
  assert_equal(11, lr.total_cases)
204
209
  assert_equal(10, lr.valid_cases)
205
210
  model_test(lr, 'rubyengine with missing data')
@@ -1,6 +1,14 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleReliabilityTestCase < Minitest::Test
3
3
  context Statsample::Reliability do
4
+ setup do
5
+ Daru.lazy_update = true
6
+ end
7
+
8
+ teardown do
9
+ Daru.lazy_update = false
10
+ end
11
+
4
12
  should 'return correct r according to Spearman-Brown prophecy' do
5
13
  r = 0.6849
6
14
  n = 62.quo(15)
@@ -15,26 +23,27 @@ class StatsampleReliabilityTestCase < Minitest::Test
15
23
  setup do
16
24
  @samples = 40
17
25
  @n_variables = rand(10) + 2
18
- @ds = Statsample::Dataset.new
19
- base = @samples.times.collect { |_a| rand }.to_numeric
26
+ @ds = Daru::DataFrame.new({}, index: @samples)
27
+ base = Daru::Vector.new(@samples.times.collect { |_a| rand })
20
28
  @n_variables.times do |i|
21
- @ds[i] = base.collect { |v| v + rand }.to_numeric
29
+ @ds[i] = Daru::Vector.new(base.collect { |v| v + rand })
22
30
  end
23
31
 
24
- @ds.update_valid_data
25
- @k = @ds.fields.size
32
+ @ds.update
33
+ @k = @ds.ncols
26
34
  @cm = Statsample::Bivariate.covariance_matrix(@ds)
27
35
  @dse = @ds.dup
28
- @dse.fields.each do |f|
29
- @dse[f] = @dse[f].standarized
36
+ @dse.vectors.each do |f|
37
+ @dse[f] = @dse[f].standardize
30
38
  end
39
+ @dse.update
31
40
  @cme = Statsample::Bivariate.covariance_matrix(@dse)
32
41
  @a = Statsample::Reliability.cronbach_alpha(@ds)
33
42
  @as = Statsample::Reliability.cronbach_alpha_standarized(@ds)
34
43
  end
35
44
  should 'alpha will be equal to sum of matrix covariance less the individual variances' do
36
45
  total_sum = @cm.total_sum
37
- ind_var = @ds.fields.inject(0) { |ac, v| ac + @ds[v].variance }
46
+ ind_var = @ds.vectors.to_a.inject(0) { |ac, v| ac + @ds[v].variance }
38
47
  expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
39
48
  assert_in_delta(expected, @a, 1e-10)
40
49
  end
@@ -57,7 +66,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
57
66
 
58
67
  should 'standarized alpha will be equal to sum of matrix covariance less the individual variances on standarized values' do
59
68
  total_sum = @cme.total_sum
60
- ind_var = @dse.fields.inject(0) { |ac, v| ac + @dse[v].variance }
69
+ ind_var = @dse.vectors.to_a.inject(0) { |ac, v| ac + @dse[v].variance }
61
70
  expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
62
71
  assert_in_delta(expected, @as, 1e-10)
63
72
  end
@@ -67,31 +76,31 @@ class StatsampleReliabilityTestCase < Minitest::Test
67
76
  @samples = 100
68
77
  @points = rand(10) + 3
69
78
  @max_point = (@points - 1) * 3
70
- @x1 = @samples.times.map { rand(@points) }.to_numeric
71
- @x2 = @samples.times.map { rand(@points) }.to_numeric
72
- @x3 = @samples.times.map { rand(@points) }.to_numeric
73
- @ds = { 'a' => @x1, 'b' => @x2, 'c' => @x3 }.to_dataset
79
+ @x1 = Daru::Vector.new(@samples.times.map { rand(@points) })
80
+ @x2 = Daru::Vector.new(@samples.times.map { rand(@points) })
81
+ @x3 = Daru::Vector.new(@samples.times.map { rand(@points) })
82
+ @ds = Daru::DataFrame.new({ :a => @x1, :b => @x2, :c => @x3 })
74
83
  @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
75
84
  end
76
85
  should 'have a correct automatic vector_total' do
77
86
  assert_equal(@ds.vector_sum, @icc.vector_total)
78
87
  end
79
88
  should 'have a correct different vector_total' do
80
- x2 = @samples.times.map { rand(10) }.to_numeric
89
+ x2 = Daru::Vector.new(@samples.times.map { rand(10) })
81
90
  @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, x2)
82
91
  assert_equal(x2, @icc.vector_total)
83
92
  assert_raises(ArgumentError) do
84
- inc = (@samples + 10).times.map { rand(10) }.to_numeric
93
+ inc = Daru::Vector.new((@samples + 10).times.map { rand(10) })
85
94
  @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, inc)
86
95
  end
87
96
  end
88
97
  should 'have 0% for 0 points on maximum value values' do
89
- max = @icc.curve_field('a', 0)[@max_point.to_f]
98
+ max = @icc.curve_field(:a, 0)[@max_point.to_f]
90
99
  max ||= 0
91
100
  assert_in_delta(0, max)
92
101
  end
93
102
  should 'have 0 for max value on minimum value' do
94
- max = @icc.curve_field('a', @max_point)[0.0]
103
+ max = @icc.curve_field(:a, @max_point)[0.0]
95
104
  max ||= 0
96
105
  assert_in_delta(0, max)
97
106
  end
@@ -107,7 +116,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
107
116
  expected = total.each {|k, v|
108
117
  total[k] = v.quo(total_g[k])
109
118
  }
110
- assert_equal(expected, @icc.curve_field('a', index))
119
+ assert_equal(expected, @icc.curve_field(:a, index))
111
120
  end
112
121
  end
113
122
 
@@ -119,33 +128,34 @@ class StatsampleReliabilityTestCase < Minitest::Test
119
128
  h = {}
120
129
  @scales.times {|s|
121
130
  @items_per_scale.times {|i|
122
- h["#{s}_#{i}"] = (size.times.map { (s * 2) + rand }).to_numeric
131
+ h["#{s}_#{i}".to_sym] = Daru::Vector.new((size.times.map { (s * 2) + rand }))
123
132
  }
124
133
  }
125
- @ds = h.to_dataset
134
+ @ds = Daru::DataFrame.new(h)
126
135
  @msa = Statsample::Reliability::MultiScaleAnalysis.new(name: 'Multiple Analysis') do |m|
127
136
  m.scale 'complete', @ds
128
137
  @scales.times {|s|
129
- m.scale "scale_#{s}", @ds.clone(@items_per_scale.times.map { |i| "#{s}_#{i}" }), name: "Scale #{s}"
138
+ m.scale "scale_#{s}", @ds.clone(*@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}"
130
139
  }
131
140
  end
132
141
  end
142
+
133
143
  should 'Retrieve correct ScaleAnalysis for whole scale' do
134
144
  sa = Statsample::Reliability::ScaleAnalysis.new(@ds, name: 'Scale complete')
135
145
  assert_equal(sa.variances_mean, @msa.scale('complete').variances_mean)
136
146
  end
137
147
  should 'Retrieve correct ScaleAnalysis for each scale' do
138
148
  @scales.times {|s|
139
- sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }), name: "Scale #{s}")
149
+ sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}")
140
150
  assert_equal(sa.variances_mean, @msa.scale("scale_#{s}").variances_mean)
141
151
  }
142
152
  end
143
153
  should 'retrieve correct correlation matrix for each scale' do
144
- vectors = { 'complete' => @ds.vector_sum }
154
+ vectors = { :complete => @ds.vector_sum }
145
155
  @scales.times {|s|
146
- vectors["scale_#{s}"] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }).vector_sum
156
+ vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
147
157
  }
148
- ds2 = vectors.to_dataset
158
+ ds2 = Daru::DataFrame.new(vectors)
149
159
  assert_equal(Statsample::Bivariate.correlation_matrix(ds2), @msa.correlation_matrix)
150
160
  end
151
161
  should 'delete scale using delete_scale' do
@@ -156,9 +166,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
156
166
  @msa.delete_scale('complete')
157
167
  vectors = {}
158
168
  @scales.times {|s|
159
- vectors["scale_#{s}"] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }).vector_sum
169
+ vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
160
170
  }
161
- ds2 = vectors.to_dataset
171
+ ds2 = Daru::DataFrame.new(vectors)
162
172
  cor_matrix = Statsample::Bivariate.correlation_matrix(ds2)
163
173
  m = 3
164
174
  pca = Statsample::Factor::PCA.new(cor_matrix, m: m)
@@ -177,31 +187,31 @@ class StatsampleReliabilityTestCase < Minitest::Test
177
187
  end
178
188
  context Statsample::Reliability::ScaleAnalysis do
179
189
  setup do
180
- @x1 = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30].to_numeric
181
- @x2 = [1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50].to_numeric
182
- @x3 = [2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40].to_numeric
183
- @x4 = [1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30].to_numeric
184
- @ds = { 'x1' => @x1, 'x2' => @x2, 'x3' => @x3, 'x4' => @x4 }.to_dataset
190
+ @x1 = Daru::Vector.new([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30])
191
+ @x2 = Daru::Vector.new([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50])
192
+ @x3 = Daru::Vector.new([2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40])
193
+ @x4 = Daru::Vector.new([1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30])
194
+ @ds = Daru::DataFrame.new({ :x1 => @x1, :x2 => @x2, :x3 => @x3, :x4 => @x4 })
185
195
  @ia = Statsample::Reliability::ScaleAnalysis.new(@ds)
186
196
  @cov_matrix = @ia.cov_m
187
197
  end
188
198
  should 'return correct values for item analysis' do
189
199
  assert_in_delta(0.980, @ia.alpha, 0.001)
190
200
  assert_in_delta(0.999, @ia.alpha_standarized, 0.001)
191
- var_mean = 4.times.map { |m| @cov_matrix[m, m] }.to_numeric.mean
201
+ var_mean = Daru::Vector.new(4.times.map { |m| @cov_matrix[m, m] }).mean
192
202
  assert_in_delta(var_mean, @ia.variances_mean)
193
- assert_equal(@x1.mean, @ia.item_statistics['x1'][:mean])
194
- assert_equal(@x4.mean, @ia.item_statistics['x4'][:mean])
195
- assert_in_delta(@x1.sds, @ia.item_statistics['x1'][:sds], 1e-14)
196
- assert_in_delta(@x4.sds, @ia.item_statistics['x4'][:sds], 1e-14)
203
+ assert_equal(@x1.mean, @ia.item_statistics[:x1][:mean])
204
+ assert_equal(@x4.mean, @ia.item_statistics[:x4][:mean])
205
+ assert_in_delta(@x1.sds, @ia.item_statistics[:x1][:sds], 1e-14)
206
+ assert_in_delta(@x4.sds, @ia.item_statistics[:x4][:sds], 1e-14)
197
207
  ds2 = @ds.clone
198
- ds2.delete_vector('x1')
208
+ ds2.delete_vector(:x1)
199
209
  vector_sum = ds2.vector_sum
200
- assert_equal(vector_sum.mean, @ia.stats_if_deleted['x1'][:mean])
201
- assert_equal(vector_sum.sds, @ia.stats_if_deleted['x1'][:sds])
202
- assert_in_delta(vector_sum.variance, @ia.stats_if_deleted['x1'][:variance_sample], 1e-10)
210
+ assert_equal(vector_sum.mean, @ia.stats_if_deleted[:x1][:mean])
211
+ assert_equal(vector_sum.sds, @ia.stats_if_deleted[:x1][:sds])
212
+ assert_in_delta(vector_sum.variance, @ia.stats_if_deleted[:x1][:variance_sample], 1e-10)
203
213
 
204
- assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted['x1'][:alpha])
214
+ assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted[:x1][:alpha])
205
215
 
206
216
  covariances = []
207
217
  4.times.each {|i|
@@ -211,9 +221,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
211
221
  end
212
222
  }
213
223
  }
214
- assert_in_delta(covariances.to_numeric.mean, @ia.covariances_mean)
215
- assert_in_delta(0.999, @ia.item_total_correlation['x1'], 0.001)
216
- assert_in_delta(1050.455, @ia.stats_if_deleted['x1'][:variance_sample], 0.001)
224
+ assert_in_delta(Daru::Vector.new(covariances).mean, @ia.covariances_mean)
225
+ assert_in_delta(0.999, @ia.item_total_correlation[:x1], 0.001)
226
+ assert_in_delta(1050.455, @ia.stats_if_deleted[:x1][:variance_sample], 0.001)
217
227
  end
218
228
  should 'return a summary' do
219
229
  assert(@ia.summary.size > 0)
@@ -5,11 +5,11 @@ $reliability_icc = nil
5
5
  class StatsampleReliabilityIccTestCase < Minitest::Test
6
6
  context Statsample::Reliability::ICC do
7
7
  setup do
8
- a = [9, 6, 8, 7, 10, 6].to_numeric
9
- b = [2, 1, 4, 1, 5, 2].to_numeric
10
- c = [5, 3, 6, 2, 6, 4].to_numeric
11
- d = [8, 2, 8, 6, 9, 7].to_numeric
12
- @ds = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
8
+ a = Daru::Vector.new([9, 6, 8, 7, 10, 6])
9
+ b = Daru::Vector.new([2, 1, 4, 1, 5, 2])
10
+ c = Daru::Vector.new([5, 3, 6, 2, 6, 4])
11
+ d = Daru::Vector.new([8, 2, 8, 6, 9, 7])
12
+ @ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
13
13
  @icc = Statsample::Reliability::ICC.new(@ds)
14
14
  end
15
15
  should 'basic method be correct' do
@@ -114,7 +114,7 @@ class StatsampleReliabilityIccTestCase < Minitest::Test
114
114
 
115
115
  begin
116
116
  require 'rserve'
117
- require 'statsample/rserve_extension'
117
+ require 'daru/extensions/rserve'
118
118
  context 'McGraw and Wong' do
119
119
  teardown do
120
120
  @r = $reliability_icc[:r].close unless $reliability_icc[:r].nil?
@@ -122,11 +122,11 @@ class StatsampleReliabilityIccTestCase < Minitest::Test
122
122
  setup do
123
123
  if $reliability_icc.nil?
124
124
  size = 100
125
- a = size.times.map { rand(10) }.to_numeric
125
+ a = Daru::Vector.new(size.times.map { rand(10) })
126
126
  b = a.recode { |i| i + rand(4) - 2 }
127
127
  c = a.recode { |i| i + rand(4) - 2 }
128
128
  d = a.recode { |i| i + rand(4) - 2 }
129
- @ds = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
129
+ @ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
130
130
 
131
131
  @icc = Statsample::Reliability::ICC.new(@ds)
132
132
  @r = Rserve::Connection.new