statsample 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -3,21 +3,21 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
3
3
  class StatsampleRegressionTestCase < Minitest::Test
4
4
  context 'Example with missing data' do
5
5
  setup do
6
- @x = [0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857].to_numeric
6
+ @x = Daru::Vector.new([0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857])
7
7
 
8
- @y = [nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil].to_numeric
9
- @ds = { 'x' => @x, 'y' => @y }.to_dataset
10
- @lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, 'y')
8
+ @y = Daru::Vector.new([nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil])
9
+ @ds = Daru::DataFrame.new({ :x => @x, :y => @y })
10
+ @lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, :y)
11
11
  end
12
12
  should 'have correct values' do
13
13
  assert_in_delta(0.455, @lr.r2, 0.001)
14
14
  assert_in_delta(0.427, @lr.r2_adjusted, 0.001)
15
15
  assert_in_delta(0.1165, @lr.se_estimate, 0.001)
16
16
  assert_in_delta(15.925, @lr.f, 0.0001)
17
- assert_in_delta(0.675, @lr.standarized_coeffs['x'], 0.001)
18
- assert_in_delta(0.778, @lr.coeffs['x'], 0.001, 'coeff x')
17
+ assert_in_delta(0.675, @lr.standarized_coeffs[:x], 0.001)
18
+ assert_in_delta(0.778, @lr.coeffs[:x], 0.001, 'coeff x')
19
19
  assert_in_delta(0.132, @lr.constant, 0.001, 'constant')
20
- assert_in_delta(0.195, @lr.coeffs_se['x'], 0.001, 'coeff x se')
20
+ assert_in_delta(0.195, @lr.coeffs_se[:x], 0.001, 'coeff x se')
21
21
  assert_in_delta(0.064, @lr.constant_se, 0.001, 'constant se')
22
22
  end
23
23
  end
@@ -26,24 +26,24 @@ class StatsampleRegressionTestCase < Minitest::Test
26
26
 
27
27
  a, b = rand, rand
28
28
 
29
- x1 = samples.times.map { rand }.to_numeric
30
- x2 = samples.times.map { rand }.to_numeric
31
- x3 = samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) }.to_numeric
32
- y = samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand }.to_numeric
29
+ x1 = Daru::Vector.new(samples.times.map { rand })
30
+ x2 = Daru::Vector.new(samples.times.map { rand })
31
+ x3 = Daru::Vector.new(samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) })
32
+ y = Daru::Vector.new(samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand })
33
33
 
34
- ds = { 'x1' => x1, 'x2' => x2, 'x3' => x3, 'y' => y }.to_dataset
34
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2, :x3 => x3, :y => y })
35
35
 
36
36
  assert_raise(Statsample::Regression::LinearDependency) {
37
- Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
37
+ Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
38
38
  }
39
39
  end
40
40
  def test_parameters
41
- @x = [13, 20, 10, 33, 15].to_vector(:numeric)
42
- @y = [23, 18, 35, 10, 27].to_vector(:numeric)
41
+ @x =Daru::Vector.new([13, 20, 10, 33, 15])
42
+ @y =Daru::Vector.new([23, 18, 35, 10, 27])
43
43
  reg = Statsample::Regression::Simple.new_from_vectors(@x, @y)
44
44
  _test_simple_regression(reg)
45
- ds = { 'x' => @x, 'y' => @y }.to_dataset
46
- reg = Statsample::Regression::Simple.new_from_dataset(ds, 'x', 'y')
45
+ ds = Daru::DataFrame.new({ :x => @x, :y => @y })
46
+ reg = Statsample::Regression::Simple.new_from_dataset(ds, :x, :y)
47
47
  _test_simple_regression(reg)
48
48
  reg = Statsample::Regression.simple(@x, @y)
49
49
  _test_simple_regression(reg)
@@ -57,11 +57,11 @@ class StatsampleRegressionTestCase < Minitest::Test
57
57
  end
58
58
 
59
59
  def test_summaries
60
- a = 10.times.map { rand(100) }.to_numeric
61
- b = 10.times.map { rand(100) }.to_numeric
62
- y = 10.times.map { rand(100) }.to_numeric
63
- ds = { 'a' => a, 'b' => b, 'y' => y }.to_dataset
64
- lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
60
+ a = Daru::Vector.new(10.times.map { rand(100) })
61
+ b = Daru::Vector.new(10.times.map { rand(100) })
62
+ y = Daru::Vector.new(10.times.map { rand(100) })
63
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :y => y })
64
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
65
65
  assert(lr.summary.size > 0)
66
66
  end
67
67
 
@@ -87,12 +87,12 @@ class StatsampleRegressionTestCase < Minitest::Test
87
87
  end
88
88
 
89
89
  def test_multiple_regression_pairwise_2
90
- @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3].to_vector(:numeric)
91
- @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2].to_vector(:numeric)
92
- @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7].to_vector(:numeric)
93
- @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil].to_vector(:numeric)
94
- ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
95
- lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
90
+ @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3])
91
+ @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2])
92
+ @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7])
93
+ @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil])
94
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
95
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
96
96
  assert_in_delta(2407.436, lr.sst, 0.001)
97
97
  assert_in_delta(0.752, lr.r, 0.001, 'pairwise r')
98
98
  assert_in_delta(0.565, lr.r2, 0.001)
@@ -103,12 +103,12 @@ class StatsampleRegressionTestCase < Minitest::Test
103
103
 
104
104
  def test_multiple_regression_gsl
105
105
  if Statsample.has_gsl?
106
- @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
107
- @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
108
- @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
109
- @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
110
- ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
111
- lr = Statsample::Regression::Multiple::GslEngine.new(ds, 'y')
106
+ @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
107
+ @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
108
+ @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
109
+ @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
110
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
111
+ lr = Statsample::Regression::Multiple::GslEngine.new(ds, :y)
112
112
  assert(lr.summary.size > 0)
113
113
  model_test(lr, 'gsl')
114
114
  predicted = [1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
@@ -127,8 +127,8 @@ class StatsampleRegressionTestCase < Minitest::Test
127
127
  end
128
128
 
129
129
  def model_test_matrix(lr, name = 'undefined')
130
- stan_coeffs = { 'a' => 0.151, 'b' => -0.547, 'c' => 0.997 }
131
- unstan_coeffs = { 'a' => 0.695, 'b' => -4.286, 'c' => 0.266 }
130
+ stan_coeffs = { :a => 0.151, :b => -0.547, :c => 0.997 }
131
+ unstan_coeffs = { :a => 0.695, :b => -4.286, :c => 0.266 }
132
132
 
133
133
  unstan_coeffs.each_key{|k|
134
134
  assert_in_delta(unstan_coeffs[k], lr.coeffs[k], 0.001, "b coeffs - #{name}")
@@ -145,15 +145,15 @@ class StatsampleRegressionTestCase < Minitest::Test
145
145
 
146
146
  assert_in_delta(20.908, lr.f, 0.001)
147
147
  assert_in_delta(0.001, lr.probability, 0.001)
148
- assert_in_delta(0.226, lr.tolerance('a'), 0.001)
148
+ assert_in_delta(0.226, lr.tolerance(:a), 0.001)
149
149
 
150
- coeffs_se = { 'a' => 1.171, 'b' => 1.129, 'c' => 0.072 }
150
+ coeffs_se = { :a => 1.171, :b => 1.129, :c => 0.072 }
151
151
 
152
152
  ccoeffs_se = lr.coeffs_se
153
153
  coeffs_se.each_key{|k|
154
154
  assert_in_delta(coeffs_se[k], ccoeffs_se[k], 0.001)
155
155
  }
156
- coeffs_t = { 'a' => 0.594, 'b' => -3.796, 'c' => 3.703 }
156
+ coeffs_t = { :a => 0.594, :b => -3.796, :c => 3.703 }
157
157
  ccoeffs_t = lr.coeffs_t
158
158
  coeffs_t.each_key{|k|
159
159
  assert_in_delta(coeffs_t[k], ccoeffs_t[k], 0.001)
@@ -174,32 +174,37 @@ class StatsampleRegressionTestCase < Minitest::Test
174
174
  end
175
175
 
176
176
  def test_regression_matrix
177
- @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
178
- @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
179
- @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
180
- @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
181
- ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
177
+ @a = Daru::Vector.new([1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
178
+ @b = Daru::Vector.new([3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
179
+ @c = Daru::Vector.new([11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
180
+ @y = Daru::Vector.new([3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
181
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
182
182
  cor = Statsample::Bivariate.correlation_matrix(ds)
183
183
 
184
- lr = Statsample::Regression::Multiple::MatrixEngine.new(cor, 'y', y_mean: @y.mean, x_mean: { 'a' => ds['a'].mean, 'b' => ds['b'].mean, 'c' => ds['c'].mean }, cases: @a.size, y_sd: @y.sd, x_sd: { 'a' => @a.sd, 'b' => @b.sd, 'c' => @c.sd })
184
+ lr = Statsample::Regression::Multiple::MatrixEngine.new(
185
+ cor, :y, y_mean: @y.mean,
186
+ x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean },
187
+ cases: @a.size, y_sd: @y.sd, x_sd: { :a => @a.sd, :b => @b.sd, :c => @c.sd })
185
188
  assert_nil(lr.constant_se)
186
189
  assert_nil(lr.constant_t)
187
190
  model_test_matrix(lr, 'correlation matrix')
188
191
 
189
192
  covariance = Statsample::Bivariate.covariance_matrix(ds)
190
- lr = Statsample::Regression::Multiple::MatrixEngine.new(covariance, 'y', y_mean: @y.mean, x_mean: { 'a' => ds['a'].mean, 'b' => ds['b'].mean, 'c' => ds['c'].mean }, cases: @a.size)
193
+ lr = Statsample::Regression::Multiple::MatrixEngine.new(
194
+ covariance, :y, y_mean: @y.mean,
195
+ x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean }, cases: @a.size)
191
196
  assert(lr.summary.size > 0)
192
197
 
193
198
  model_test(lr, 'covariance matrix')
194
199
  end
195
200
 
196
201
  def test_regression_rubyengine
197
- @a = [nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
198
- @b = [nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
199
- @c = [nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
200
- @y = [nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
201
- ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
202
- lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
202
+ @a = Daru::Vector.new([nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
203
+ @b = Daru::Vector.new([nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
204
+ @c = Daru::Vector.new([nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
205
+ @y = Daru::Vector.new([nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
206
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
207
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
203
208
  assert_equal(11, lr.total_cases)
204
209
  assert_equal(10, lr.valid_cases)
205
210
  model_test(lr, 'rubyengine with missing data')
@@ -1,6 +1,14 @@
1
1
  require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  class StatsampleReliabilityTestCase < Minitest::Test
3
3
  context Statsample::Reliability do
4
+ setup do
5
+ Daru.lazy_update = true
6
+ end
7
+
8
+ teardown do
9
+ Daru.lazy_update = false
10
+ end
11
+
4
12
  should 'return correct r according to Spearman-Brown prophecy' do
5
13
  r = 0.6849
6
14
  n = 62.quo(15)
@@ -15,26 +23,27 @@ class StatsampleReliabilityTestCase < Minitest::Test
15
23
  setup do
16
24
  @samples = 40
17
25
  @n_variables = rand(10) + 2
18
- @ds = Statsample::Dataset.new
19
- base = @samples.times.collect { |_a| rand }.to_numeric
26
+ @ds = Daru::DataFrame.new({}, index: @samples)
27
+ base = Daru::Vector.new(@samples.times.collect { |_a| rand })
20
28
  @n_variables.times do |i|
21
- @ds[i] = base.collect { |v| v + rand }.to_numeric
29
+ @ds[i] = Daru::Vector.new(base.collect { |v| v + rand })
22
30
  end
23
31
 
24
- @ds.update_valid_data
25
- @k = @ds.fields.size
32
+ @ds.update
33
+ @k = @ds.ncols
26
34
  @cm = Statsample::Bivariate.covariance_matrix(@ds)
27
35
  @dse = @ds.dup
28
- @dse.fields.each do |f|
29
- @dse[f] = @dse[f].standarized
36
+ @dse.vectors.each do |f|
37
+ @dse[f] = @dse[f].standardize
30
38
  end
39
+ @dse.update
31
40
  @cme = Statsample::Bivariate.covariance_matrix(@dse)
32
41
  @a = Statsample::Reliability.cronbach_alpha(@ds)
33
42
  @as = Statsample::Reliability.cronbach_alpha_standarized(@ds)
34
43
  end
35
44
  should 'alpha will be equal to sum of matrix covariance less the individual variances' do
36
45
  total_sum = @cm.total_sum
37
- ind_var = @ds.fields.inject(0) { |ac, v| ac + @ds[v].variance }
46
+ ind_var = @ds.vectors.to_a.inject(0) { |ac, v| ac + @ds[v].variance }
38
47
  expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
39
48
  assert_in_delta(expected, @a, 1e-10)
40
49
  end
@@ -57,7 +66,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
57
66
 
58
67
  should 'standarized alpha will be equal to sum of matrix covariance less the individual variances on standarized values' do
59
68
  total_sum = @cme.total_sum
60
- ind_var = @dse.fields.inject(0) { |ac, v| ac + @dse[v].variance }
69
+ ind_var = @dse.vectors.to_a.inject(0) { |ac, v| ac + @dse[v].variance }
61
70
  expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
62
71
  assert_in_delta(expected, @as, 1e-10)
63
72
  end
@@ -67,31 +76,31 @@ class StatsampleReliabilityTestCase < Minitest::Test
67
76
  @samples = 100
68
77
  @points = rand(10) + 3
69
78
  @max_point = (@points - 1) * 3
70
- @x1 = @samples.times.map { rand(@points) }.to_numeric
71
- @x2 = @samples.times.map { rand(@points) }.to_numeric
72
- @x3 = @samples.times.map { rand(@points) }.to_numeric
73
- @ds = { 'a' => @x1, 'b' => @x2, 'c' => @x3 }.to_dataset
79
+ @x1 = Daru::Vector.new(@samples.times.map { rand(@points) })
80
+ @x2 = Daru::Vector.new(@samples.times.map { rand(@points) })
81
+ @x3 = Daru::Vector.new(@samples.times.map { rand(@points) })
82
+ @ds = Daru::DataFrame.new({ :a => @x1, :b => @x2, :c => @x3 })
74
83
  @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
75
84
  end
76
85
  should 'have a correct automatic vector_total' do
77
86
  assert_equal(@ds.vector_sum, @icc.vector_total)
78
87
  end
79
88
  should 'have a correct different vector_total' do
80
- x2 = @samples.times.map { rand(10) }.to_numeric
89
+ x2 = Daru::Vector.new(@samples.times.map { rand(10) })
81
90
  @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, x2)
82
91
  assert_equal(x2, @icc.vector_total)
83
92
  assert_raises(ArgumentError) do
84
- inc = (@samples + 10).times.map { rand(10) }.to_numeric
93
+ inc = Daru::Vector.new((@samples + 10).times.map { rand(10) })
85
94
  @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, inc)
86
95
  end
87
96
  end
88
97
  should 'have 0% for 0 points on maximum value values' do
89
- max = @icc.curve_field('a', 0)[@max_point.to_f]
98
+ max = @icc.curve_field(:a, 0)[@max_point.to_f]
90
99
  max ||= 0
91
100
  assert_in_delta(0, max)
92
101
  end
93
102
  should 'have 0 for max value on minimum value' do
94
- max = @icc.curve_field('a', @max_point)[0.0]
103
+ max = @icc.curve_field(:a, @max_point)[0.0]
95
104
  max ||= 0
96
105
  assert_in_delta(0, max)
97
106
  end
@@ -107,7 +116,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
107
116
  expected = total.each {|k, v|
108
117
  total[k] = v.quo(total_g[k])
109
118
  }
110
- assert_equal(expected, @icc.curve_field('a', index))
119
+ assert_equal(expected, @icc.curve_field(:a, index))
111
120
  end
112
121
  end
113
122
 
@@ -119,33 +128,34 @@ class StatsampleReliabilityTestCase < Minitest::Test
119
128
  h = {}
120
129
  @scales.times {|s|
121
130
  @items_per_scale.times {|i|
122
- h["#{s}_#{i}"] = (size.times.map { (s * 2) + rand }).to_numeric
131
+ h["#{s}_#{i}".to_sym] = Daru::Vector.new((size.times.map { (s * 2) + rand }))
123
132
  }
124
133
  }
125
- @ds = h.to_dataset
134
+ @ds = Daru::DataFrame.new(h)
126
135
  @msa = Statsample::Reliability::MultiScaleAnalysis.new(name: 'Multiple Analysis') do |m|
127
136
  m.scale 'complete', @ds
128
137
  @scales.times {|s|
129
- m.scale "scale_#{s}", @ds.clone(@items_per_scale.times.map { |i| "#{s}_#{i}" }), name: "Scale #{s}"
138
+ m.scale "scale_#{s}", @ds.clone(*@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}"
130
139
  }
131
140
  end
132
141
  end
142
+
133
143
  should 'Retrieve correct ScaleAnalysis for whole scale' do
134
144
  sa = Statsample::Reliability::ScaleAnalysis.new(@ds, name: 'Scale complete')
135
145
  assert_equal(sa.variances_mean, @msa.scale('complete').variances_mean)
136
146
  end
137
147
  should 'Retrieve correct ScaleAnalysis for each scale' do
138
148
  @scales.times {|s|
139
- sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }), name: "Scale #{s}")
149
+ sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}")
140
150
  assert_equal(sa.variances_mean, @msa.scale("scale_#{s}").variances_mean)
141
151
  }
142
152
  end
143
153
  should 'retrieve correct correlation matrix for each scale' do
144
- vectors = { 'complete' => @ds.vector_sum }
154
+ vectors = { :complete => @ds.vector_sum }
145
155
  @scales.times {|s|
146
- vectors["scale_#{s}"] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }).vector_sum
156
+ vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
147
157
  }
148
- ds2 = vectors.to_dataset
158
+ ds2 = Daru::DataFrame.new(vectors)
149
159
  assert_equal(Statsample::Bivariate.correlation_matrix(ds2), @msa.correlation_matrix)
150
160
  end
151
161
  should 'delete scale using delete_scale' do
@@ -156,9 +166,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
156
166
  @msa.delete_scale('complete')
157
167
  vectors = {}
158
168
  @scales.times {|s|
159
- vectors["scale_#{s}"] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }).vector_sum
169
+ vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
160
170
  }
161
- ds2 = vectors.to_dataset
171
+ ds2 = Daru::DataFrame.new(vectors)
162
172
  cor_matrix = Statsample::Bivariate.correlation_matrix(ds2)
163
173
  m = 3
164
174
  pca = Statsample::Factor::PCA.new(cor_matrix, m: m)
@@ -177,31 +187,31 @@ class StatsampleReliabilityTestCase < Minitest::Test
177
187
  end
178
188
  context Statsample::Reliability::ScaleAnalysis do
179
189
  setup do
180
- @x1 = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30].to_numeric
181
- @x2 = [1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50].to_numeric
182
- @x3 = [2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40].to_numeric
183
- @x4 = [1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30].to_numeric
184
- @ds = { 'x1' => @x1, 'x2' => @x2, 'x3' => @x3, 'x4' => @x4 }.to_dataset
190
+ @x1 = Daru::Vector.new([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30])
191
+ @x2 = Daru::Vector.new([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50])
192
+ @x3 = Daru::Vector.new([2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40])
193
+ @x4 = Daru::Vector.new([1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30])
194
+ @ds = Daru::DataFrame.new({ :x1 => @x1, :x2 => @x2, :x3 => @x3, :x4 => @x4 })
185
195
  @ia = Statsample::Reliability::ScaleAnalysis.new(@ds)
186
196
  @cov_matrix = @ia.cov_m
187
197
  end
188
198
  should 'return correct values for item analysis' do
189
199
  assert_in_delta(0.980, @ia.alpha, 0.001)
190
200
  assert_in_delta(0.999, @ia.alpha_standarized, 0.001)
191
- var_mean = 4.times.map { |m| @cov_matrix[m, m] }.to_numeric.mean
201
+ var_mean = Daru::Vector.new(4.times.map { |m| @cov_matrix[m, m] }).mean
192
202
  assert_in_delta(var_mean, @ia.variances_mean)
193
- assert_equal(@x1.mean, @ia.item_statistics['x1'][:mean])
194
- assert_equal(@x4.mean, @ia.item_statistics['x4'][:mean])
195
- assert_in_delta(@x1.sds, @ia.item_statistics['x1'][:sds], 1e-14)
196
- assert_in_delta(@x4.sds, @ia.item_statistics['x4'][:sds], 1e-14)
203
+ assert_equal(@x1.mean, @ia.item_statistics[:x1][:mean])
204
+ assert_equal(@x4.mean, @ia.item_statistics[:x4][:mean])
205
+ assert_in_delta(@x1.sds, @ia.item_statistics[:x1][:sds], 1e-14)
206
+ assert_in_delta(@x4.sds, @ia.item_statistics[:x4][:sds], 1e-14)
197
207
  ds2 = @ds.clone
198
- ds2.delete_vector('x1')
208
+ ds2.delete_vector(:x1)
199
209
  vector_sum = ds2.vector_sum
200
- assert_equal(vector_sum.mean, @ia.stats_if_deleted['x1'][:mean])
201
- assert_equal(vector_sum.sds, @ia.stats_if_deleted['x1'][:sds])
202
- assert_in_delta(vector_sum.variance, @ia.stats_if_deleted['x1'][:variance_sample], 1e-10)
210
+ assert_equal(vector_sum.mean, @ia.stats_if_deleted[:x1][:mean])
211
+ assert_equal(vector_sum.sds, @ia.stats_if_deleted[:x1][:sds])
212
+ assert_in_delta(vector_sum.variance, @ia.stats_if_deleted[:x1][:variance_sample], 1e-10)
203
213
 
204
- assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted['x1'][:alpha])
214
+ assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted[:x1][:alpha])
205
215
 
206
216
  covariances = []
207
217
  4.times.each {|i|
@@ -211,9 +221,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
211
221
  end
212
222
  }
213
223
  }
214
- assert_in_delta(covariances.to_numeric.mean, @ia.covariances_mean)
215
- assert_in_delta(0.999, @ia.item_total_correlation['x1'], 0.001)
216
- assert_in_delta(1050.455, @ia.stats_if_deleted['x1'][:variance_sample], 0.001)
224
+ assert_in_delta(Daru::Vector.new(covariances).mean, @ia.covariances_mean)
225
+ assert_in_delta(0.999, @ia.item_total_correlation[:x1], 0.001)
226
+ assert_in_delta(1050.455, @ia.stats_if_deleted[:x1][:variance_sample], 0.001)
217
227
  end
218
228
  should 'return a summary' do
219
229
  assert(@ia.summary.size > 0)
@@ -5,11 +5,11 @@ $reliability_icc = nil
5
5
  class StatsampleReliabilityIccTestCase < Minitest::Test
6
6
  context Statsample::Reliability::ICC do
7
7
  setup do
8
- a = [9, 6, 8, 7, 10, 6].to_numeric
9
- b = [2, 1, 4, 1, 5, 2].to_numeric
10
- c = [5, 3, 6, 2, 6, 4].to_numeric
11
- d = [8, 2, 8, 6, 9, 7].to_numeric
12
- @ds = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
8
+ a = Daru::Vector.new([9, 6, 8, 7, 10, 6])
9
+ b = Daru::Vector.new([2, 1, 4, 1, 5, 2])
10
+ c = Daru::Vector.new([5, 3, 6, 2, 6, 4])
11
+ d = Daru::Vector.new([8, 2, 8, 6, 9, 7])
12
+ @ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
13
13
  @icc = Statsample::Reliability::ICC.new(@ds)
14
14
  end
15
15
  should 'basic method be correct' do
@@ -114,7 +114,7 @@ class StatsampleReliabilityIccTestCase < Minitest::Test
114
114
 
115
115
  begin
116
116
  require 'rserve'
117
- require 'statsample/rserve_extension'
117
+ require 'daru/extensions/rserve'
118
118
  context 'McGraw and Wong' do
119
119
  teardown do
120
120
  @r = $reliability_icc[:r].close unless $reliability_icc[:r].nil?
@@ -122,11 +122,11 @@ class StatsampleReliabilityIccTestCase < Minitest::Test
122
122
  setup do
123
123
  if $reliability_icc.nil?
124
124
  size = 100
125
- a = size.times.map { rand(10) }.to_numeric
125
+ a = Daru::Vector.new(size.times.map { rand(10) })
126
126
  b = a.recode { |i| i + rand(4) - 2 }
127
127
  c = a.recode { |i| i + rand(4) - 2 }
128
128
  d = a.recode { |i| i + rand(4) - 2 }
129
- @ds = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
129
+ @ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
130
130
 
131
131
  @icc = Statsample::Reliability::ICC.new(@ds)
132
132
  @r = Rserve::Connection.new