statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,231 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+
3
+ class StatsampleRegressionTestCase < Minitest::Test
4
+ context 'Example with missing data' do
5
+ setup do
6
+ @x = Daru::Vector.new([0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857])
7
+
8
+ @y = Daru::Vector.new([nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil])
9
+ @ds = Daru::DataFrame.new({ :x => @x, :y => @y })
10
+ @lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, :y)
11
+ end
12
+ should 'have correct values' do
13
+ assert_in_delta(0.455, @lr.r2, 0.001)
14
+ assert_in_delta(0.427, @lr.r2_adjusted, 0.001)
15
+ assert_in_delta(0.1165, @lr.se_estimate, 0.001)
16
+ assert_in_delta(15.925, @lr.f, 0.0001)
17
+ assert_in_delta(0.675, @lr.standarized_coeffs[:x], 0.001)
18
+ assert_in_delta(0.778, @lr.coeffs[:x], 0.001, 'coeff x')
19
+ assert_in_delta(0.132, @lr.constant, 0.001, 'constant')
20
+ assert_in_delta(0.195, @lr.coeffs_se[:x], 0.001, 'coeff x se')
21
+ assert_in_delta(0.064, @lr.constant_se, 0.001, 'constant se')
22
+ end
23
+ end
24
+ should 'return an error if data is linearly dependent' do
25
+ samples = 100
26
+
27
+ a, b = rand, rand
28
+
29
+ x1 = Daru::Vector.new(samples.times.map { rand })
30
+ x2 = Daru::Vector.new(samples.times.map { rand })
31
+ x3 = Daru::Vector.new(samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) })
32
+ y = Daru::Vector.new(samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand })
33
+
34
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2, :x3 => x3, :y => y })
35
+
36
+ assert_raise(Statsample::Regression::LinearDependency) {
37
+ Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
38
+ }
39
+ end
40
+ def test_parameters
41
+ @x =Daru::Vector.new([13, 20, 10, 33, 15])
42
+ @y =Daru::Vector.new([23, 18, 35, 10, 27])
43
+ reg = Statsample::Regression::Simple.new_from_vectors(@x, @y)
44
+ _test_simple_regression(reg)
45
+ ds = Daru::DataFrame.new({ :x => @x, :y => @y })
46
+ reg = Statsample::Regression::Simple.new_from_dataset(ds, :x, :y)
47
+ _test_simple_regression(reg)
48
+ reg = Statsample::Regression.simple(@x, @y)
49
+ _test_simple_regression(reg)
50
+ end
51
+
52
+ def _test_simple_regression(reg)
53
+ assert_in_delta(40.009, reg.a, 0.001)
54
+ assert_in_delta(-0.957, reg.b, 0.001)
55
+ assert_in_delta(4.248, reg.standard_error, 0.002)
56
+ assert(reg.summary)
57
+ end
58
+
59
+ def test_summaries
60
+ a = Daru::Vector.new(10.times.map { rand(100) })
61
+ b = Daru::Vector.new(10.times.map { rand(100) })
62
+ y = Daru::Vector.new(10.times.map { rand(100) })
63
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :y => y })
64
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
65
+ assert(lr.summary.size > 0)
66
+ end
67
+
68
+ def test_multiple_dependent
69
+ complete = Matrix[
70
+ [1, 0.53, 0.62, 0.19, -0.09, 0.08, 0.02, -0.12, 0.08],
71
+ [0.53, 1, 0.61, 0.23, 0.1, 0.18, 0.02, -0.1, 0.15],
72
+ [0.62, 0.61, 1, 0.03, 0.1, 0.12, 0.03, -0.06, 0.12],
73
+ [0.19, 0.23, 0.03, 1, -0.02, 0.02, 0, -0.02, -0.02],
74
+ [-0.09, 0.1, 0.1, -0.02, 1, 0.05, 0.06, 0.18, 0.02],
75
+ [0.08, 0.18, 0.12, 0.02, 0.05, 1, 0.22, -0.07, 0.36],
76
+ [0.02, 0.02, 0.03, 0, 0.06, 0.22, 1, -0.01, -0.05],
77
+ [-0.12, -0.1, -0.06, -0.02, 0.18, -0.07, -0.01, 1, -0.03],
78
+ [0.08, 0.15, 0.12, -0.02, 0.02, 0.36, -0.05, -0.03, 1]]
79
+ complete.extend Statsample::CovariateMatrix
80
+ complete.fields = %w(adhd cd odd sex age monly mwork mage poverty)
81
+
82
+ lr = Statsample::Regression::Multiple::MultipleDependent.new(complete, %w(adhd cd odd))
83
+
84
+ assert_in_delta(0.197, lr.r2yx, 0.001)
85
+ assert_in_delta(0.197, lr.r2yx_covariance, 0.001)
86
+ assert_in_delta(0.07, lr.p2yx, 0.001)
87
+ end
88
+
89
+ def test_multiple_regression_pairwise_2
90
+ @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3])
91
+ @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2])
92
+ @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7])
93
+ @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil])
94
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
95
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
96
+ assert_in_delta(2407.436, lr.sst, 0.001)
97
+ assert_in_delta(0.752, lr.r, 0.001, 'pairwise r')
98
+ assert_in_delta(0.565, lr.r2, 0.001)
99
+ assert_in_delta(1361.130, lr.ssr, 0.001)
100
+ assert_in_delta(1046.306, lr.sse, 0.001)
101
+ assert_in_delta(3.035, lr.f, 0.001)
102
+ end
103
+
104
+ def test_multiple_regression_gsl
105
+ if Statsample.has_gsl?
106
+ @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
107
+ @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
108
+ @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
109
+ @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
110
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
111
+ lr = Statsample::Regression::Multiple::GslEngine.new(ds, :y)
112
+ assert(lr.summary.size > 0)
113
+ model_test(lr, 'gsl')
114
+ predicted = [1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
115
+ c_predicted = lr.predicted
116
+ predicted.each_index{|i|
117
+ assert_in_delta(predicted[i], c_predicted[i], 0.001)
118
+ }
119
+ residuals = [1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
120
+ c_residuals = lr.residuals
121
+ residuals.each_index{|i|
122
+ assert_in_delta(residuals[i], c_residuals[i], 0.001)
123
+ }
124
+ else
125
+ skip 'Regression::Multiple::GslEngine not tested (no Gsl)'
126
+ end
127
+ end
128
+
129
+ def model_test_matrix(lr, name = 'undefined')
130
+ stan_coeffs = { :a => 0.151, :b => -0.547, :c => 0.997 }
131
+ unstan_coeffs = { :a => 0.695, :b => -4.286, :c => 0.266 }
132
+
133
+ unstan_coeffs.each_key{|k|
134
+ assert_in_delta(unstan_coeffs[k], lr.coeffs[k], 0.001, "b coeffs - #{name}")
135
+ }
136
+
137
+ stan_coeffs.each_key{|k|
138
+ assert_in_delta(stan_coeffs[k], lr.standarized_coeffs[k], 0.001, "beta coeffs - #{name}")
139
+ }
140
+
141
+ assert_in_delta(11.027, lr.constant, 0.001)
142
+
143
+ assert_in_delta(0.955, lr.r, 0.001)
144
+ assert_in_delta(0.913, lr.r2, 0.001)
145
+
146
+ assert_in_delta(20.908, lr.f, 0.001)
147
+ assert_in_delta(0.001, lr.probability, 0.001)
148
+ assert_in_delta(0.226, lr.tolerance(:a), 0.001)
149
+
150
+ coeffs_se = { :a => 1.171, :b => 1.129, :c => 0.072 }
151
+
152
+ ccoeffs_se = lr.coeffs_se
153
+ coeffs_se.each_key{|k|
154
+ assert_in_delta(coeffs_se[k], ccoeffs_se[k], 0.001)
155
+ }
156
+ coeffs_t = { :a => 0.594, :b => -3.796, :c => 3.703 }
157
+ ccoeffs_t = lr.coeffs_t
158
+ coeffs_t.each_key{|k|
159
+ assert_in_delta(coeffs_t[k], ccoeffs_t[k], 0.001)
160
+ }
161
+
162
+ assert_in_delta(639.6, lr.sst, 0.001)
163
+ assert_in_delta(583.76, lr.ssr, 0.001)
164
+ assert_in_delta(55.840, lr.sse, 0.001)
165
+ assert(lr.summary.size > 0, "#{name} without summary")
166
+ end
167
+
168
+ def model_test(lr, name = 'undefined')
169
+ model_test_matrix(lr, name)
170
+ assert_in_delta(4.559, lr.constant_se, 0.001)
171
+ assert_in_delta(2.419, lr.constant_t, 0.001)
172
+
173
+ assert_in_delta(1.785, lr.process([1, 3, 11]), 0.001)
174
+ end
175
+
176
+ def test_regression_matrix
177
+ @a = Daru::Vector.new([1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
178
+ @b = Daru::Vector.new([3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
179
+ @c = Daru::Vector.new([11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
180
+ @y = Daru::Vector.new([3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
181
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
182
+ cor = Statsample::Bivariate.correlation_matrix(ds)
183
+
184
+ lr = Statsample::Regression::Multiple::MatrixEngine.new(
185
+ cor, :y, y_mean: @y.mean,
186
+ x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean },
187
+ cases: @a.size, y_sd: @y.sd, x_sd: { :a => @a.sd, :b => @b.sd, :c => @c.sd })
188
+ assert_nil(lr.constant_se)
189
+ assert_nil(lr.constant_t)
190
+ model_test_matrix(lr, 'correlation matrix')
191
+
192
+ covariance = Statsample::Bivariate.covariance_matrix(ds)
193
+ lr = Statsample::Regression::Multiple::MatrixEngine.new(
194
+ covariance, :y, y_mean: @y.mean,
195
+ x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean }, cases: @a.size)
196
+ assert(lr.summary.size > 0)
197
+
198
+ model_test(lr, 'covariance matrix')
199
+ end
200
+
201
+ def test_regression_rubyengine
202
+ @a = Daru::Vector.new([nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
203
+ @b = Daru::Vector.new([nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
204
+ @c = Daru::Vector.new([nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
205
+ @y = Daru::Vector.new([nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
206
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
207
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
208
+ assert_equal(11, lr.total_cases)
209
+ assert_equal(10, lr.valid_cases)
210
+ model_test(lr, 'rubyengine with missing data')
211
+
212
+ predicted = [nil, 1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
213
+ c_predicted = lr.predicted
214
+ predicted.each_index do |i|
215
+ if c_predicted[i].nil?
216
+ assert(predicted[i].nil?, "Actual #{i} is nil, but expected #{predicted[i]}")
217
+ else
218
+ assert_in_delta(predicted[i], c_predicted[i], 0.001)
219
+ end
220
+ end
221
+ residuals = [nil, 1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
222
+ c_residuals = lr.residuals
223
+ residuals.each_index do |i|
224
+ if c_residuals[i].nil?
225
+ assert(residuals[i].nil?)
226
+ else
227
+ assert_in_delta(residuals[i], c_residuals[i], 0.001)
228
+ end
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,223 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleReliabilityTestCase < Minitest::Test
3
+ context Statsample::Reliability do
4
+ should 'return correct r according to Spearman-Brown prophecy' do
5
+ r = 0.6849
6
+ n = 62.quo(15)
7
+ assert_in_delta(0.9, Statsample::Reliability.sbp(r, n), 0.001)
8
+ end
9
+ should 'return correct n for desired realiability' do
10
+ r = 0.6849
11
+ r_d = 0.9
12
+ assert_in_delta(62, Statsample::Reliability.n_for_desired_reliability(r, r_d, 15), 0.5)
13
+ end
14
+ context "Cronbach's alpha" do
15
+ setup do
16
+ @samples = 40
17
+ @n_variables = rand(10) + 2
18
+ @ds = Daru::DataFrame.new({}, index: @samples)
19
+ base = Daru::Vector.new(@samples.times.collect { |_a| rand })
20
+ @n_variables.times do |i|
21
+ @ds[i] = Daru::Vector.new(base.collect { |v| v + rand })
22
+ end
23
+
24
+ @k = @ds.ncols
25
+ @cm = Statsample::Bivariate.covariance_matrix(@ds)
26
+ @dse = @ds.dup
27
+ @dse.vectors.each do |f|
28
+ @dse[f] = @dse[f].standardize
29
+ end
30
+ @cme = Statsample::Bivariate.covariance_matrix(@dse)
31
+ @a = Statsample::Reliability.cronbach_alpha(@ds)
32
+ @as = Statsample::Reliability.cronbach_alpha_standarized(@ds)
33
+ end
34
+ should 'alpha will be equal to sum of matrix covariance less the individual variances' do
35
+ total_sum = @cm.total_sum
36
+ ind_var = @ds.vectors.to_a.inject(0) { |ac, v| ac + @ds[v].variance }
37
+ expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
38
+ assert_in_delta(expected, @a, 1e-10)
39
+ end
40
+ should 'method cronbach_alpha_from_n_s2_cov return correct values' do
41
+ sa = Statsample::Reliability::ScaleAnalysis.new(@ds)
42
+ vm, cm = sa.variances_mean, sa.covariances_mean
43
+ assert_in_delta(sa.alpha, Statsample::Reliability.cronbach_alpha_from_n_s2_cov(@n_variables, vm, cm), 1e-10)
44
+ end
45
+ should 'method cronbach_alpha_from_covariance_matrix returns correct value' do
46
+ cov = Statsample::Bivariate.covariance_matrix(@ds)
47
+ assert_in_delta(@a, Statsample::Reliability.cronbach_alpha_from_covariance_matrix(cov), 0.0000001)
48
+ end
49
+ should 'return correct n for desired alpha, covariance and variance' do
50
+ sa = Statsample::Reliability::ScaleAnalysis.new(@ds)
51
+ vm, cm = sa.variances_mean, sa.covariances_mean
52
+ n_obtained = Statsample::Reliability.n_for_desired_alpha(@a, vm, cm)
53
+ # p n_obtained
54
+ assert_in_delta(Statsample::Reliability.cronbach_alpha_from_n_s2_cov(n_obtained, vm, cm), @a, 0.001)
55
+ end
56
+
57
+ should 'standarized alpha will be equal to sum of matrix covariance less the individual variances on standarized values' do
58
+ total_sum = @cme.total_sum
59
+ ind_var = @dse.vectors.to_a.inject(0) { |ac, v| ac + @dse[v].variance }
60
+ expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
61
+ assert_in_delta(expected, @as, 1e-10)
62
+ end
63
+ end
64
+ context Statsample::Reliability::ItemCharacteristicCurve do
65
+ setup do
66
+ @samples = 100
67
+ @points = rand(10) + 3
68
+ @max_point = (@points - 1) * 3
69
+ @x1 = Daru::Vector.new(@samples.times.map { rand(@points) })
70
+ @x2 = Daru::Vector.new(@samples.times.map { rand(@points) })
71
+ @x3 = Daru::Vector.new(@samples.times.map { rand(@points) })
72
+ @ds = Daru::DataFrame.new({ :a => @x1, :b => @x2, :c => @x3 })
73
+ @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
74
+ end
75
+ should 'have a correct automatic vector_total' do
76
+ assert_equal(@ds.vector_sum, @icc.vector_total)
77
+ end
78
+ should 'have a correct different vector_total' do
79
+ x2 = Daru::Vector.new(@samples.times.map { rand(10) })
80
+ @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, x2)
81
+ assert_equal(x2, @icc.vector_total)
82
+ assert_raises(ArgumentError) do
83
+ inc = Daru::Vector.new((@samples + 10).times.map { rand(10) })
84
+ @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, inc)
85
+ end
86
+ end
87
+ should 'have 0% for 0 points on maximum value values' do
88
+ max = @icc.curve_field(:a, 0)[@max_point.to_f]
89
+ max ||= 0
90
+ assert_in_delta(0, max)
91
+ end
92
+ should 'have 0 for max value on minimum value' do
93
+ max = @icc.curve_field(:a, @max_point)[0.0]
94
+ max ||= 0
95
+ assert_in_delta(0, max)
96
+ end
97
+ should 'have correct values of % for any value' do
98
+ sum = @icc.vector_total
99
+ total = {}
100
+ total_g = sum.frequencies
101
+ index = rand(@points)
102
+ @x1.each_with_index do |v, i|
103
+ total[sum[i]] ||= 0
104
+ total[sum[i]] += 1 if v == index
105
+ end
106
+ expected = total.each {|k, v|
107
+ total[k] = v.quo(total_g[k])
108
+ }
109
+ assert_equal(expected, @icc.curve_field(:a, index))
110
+ end
111
+ end
112
+
113
+ context Statsample::Reliability::MultiScaleAnalysis do
114
+ setup do
115
+ size = 100
116
+ @scales = 3
117
+ @items_per_scale = 10
118
+ h = {}
119
+ @scales.times {|s|
120
+ @items_per_scale.times {|i|
121
+ h["#{s}_#{i}".to_sym] = Daru::Vector.new((size.times.map { (s * 2) + rand }))
122
+ }
123
+ }
124
+ @ds = Daru::DataFrame.new(h)
125
+ @msa = Statsample::Reliability::MultiScaleAnalysis.new(name: 'Multiple Analysis') do |m|
126
+ m.scale 'complete', @ds
127
+ @scales.times {|s|
128
+ m.scale "scale_#{s}", @ds.clone(*@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}"
129
+ }
130
+ end
131
+ end
132
+
133
+ should 'Retrieve correct ScaleAnalysis for whole scale' do
134
+ sa = Statsample::Reliability::ScaleAnalysis.new(@ds, name: 'Scale complete')
135
+ assert_equal(sa.variances_mean, @msa.scale('complete').variances_mean)
136
+ end
137
+ should 'Retrieve correct ScaleAnalysis for each scale' do
138
+ @scales.times {|s|
139
+ sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}")
140
+ assert_equal(sa.variances_mean, @msa.scale("scale_#{s}").variances_mean)
141
+ }
142
+ end
143
+ should 'retrieve correct correlation matrix for each scale' do
144
+ vectors = { :complete => @ds.vector_sum }
145
+ @scales.times {|s|
146
+ vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
147
+ }
148
+ ds2 = Daru::DataFrame.new(vectors)
149
+ assert_equal(Statsample::Bivariate.correlation_matrix(ds2), @msa.correlation_matrix)
150
+ end
151
+ should 'delete scale using delete_scale' do
152
+ @msa.delete_scale('complete')
153
+ assert_equal(@msa.scales.keys.sort, @scales.times.map { |s| "scale_#{s}" })
154
+ end
155
+ should 'retrieve pca for scales' do
156
+ @msa.delete_scale('complete')
157
+ vectors = {}
158
+ @scales.times {|s|
159
+ vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
160
+ }
161
+ ds2 = Daru::DataFrame.new(vectors)
162
+ cor_matrix = Statsample::Bivariate.correlation_matrix(ds2)
163
+ m = 3
164
+ pca = Statsample::Factor::PCA.new(cor_matrix, m: m)
165
+ assert_equal(pca.component_matrix, @msa.pca(m: m).component_matrix)
166
+ end
167
+ should 'retrieve acceptable summary' do
168
+ @msa.delete_scale('scale_0')
169
+ @msa.delete_scale('scale_1')
170
+ @msa.delete_scale('scale_2')
171
+
172
+ # @msa.summary_correlation_matrix=true
173
+ # @msa.summary_pca=true
174
+
175
+ assert(@msa.summary.size > 0)
176
+ end
177
+ end
178
+ context Statsample::Reliability::ScaleAnalysis do
179
+ setup do
180
+ @x1 = Daru::Vector.new([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30])
181
+ @x2 = Daru::Vector.new([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50])
182
+ @x3 = Daru::Vector.new([2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40])
183
+ @x4 = Daru::Vector.new([1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30])
184
+ @ds = Daru::DataFrame.new({ :x1 => @x1, :x2 => @x2, :x3 => @x3, :x4 => @x4 })
185
+ @ia = Statsample::Reliability::ScaleAnalysis.new(@ds)
186
+ @cov_matrix = @ia.cov_m
187
+ end
188
+ should 'return correct values for item analysis' do
189
+ assert_in_delta(0.980, @ia.alpha, 0.001)
190
+ assert_in_delta(0.999, @ia.alpha_standarized, 0.001)
191
+ var_mean = Daru::Vector.new(4.times.map { |m| @cov_matrix[m, m] }).mean
192
+ assert_in_delta(var_mean, @ia.variances_mean)
193
+ assert_equal(@x1.mean, @ia.item_statistics[:x1][:mean])
194
+ assert_equal(@x4.mean, @ia.item_statistics[:x4][:mean])
195
+ assert_in_delta(@x1.sds, @ia.item_statistics[:x1][:sds], 1e-14)
196
+ assert_in_delta(@x4.sds, @ia.item_statistics[:x4][:sds], 1e-14)
197
+ ds2 = @ds.clone
198
+ ds2.delete_vector(:x1)
199
+ vector_sum = ds2.vector_sum
200
+ assert_equal(vector_sum.mean, @ia.stats_if_deleted[:x1][:mean])
201
+ assert_equal(vector_sum.sds, @ia.stats_if_deleted[:x1][:sds])
202
+ assert_in_delta(vector_sum.variance, @ia.stats_if_deleted[:x1][:variance_sample], 1e-10)
203
+
204
+ assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted[:x1][:alpha])
205
+
206
+ covariances = []
207
+ 4.times.each {|i|
208
+ 4.times.each {|j|
209
+ if i != j
210
+ covariances.push(@cov_matrix[i, j])
211
+ end
212
+ }
213
+ }
214
+ assert_in_delta(Daru::Vector.new(covariances).mean, @ia.covariances_mean)
215
+ assert_in_delta(0.999, @ia.item_total_correlation[:x1], 0.001)
216
+ assert_in_delta(1050.455, @ia.stats_if_deleted[:x1][:variance_sample], 0.001)
217
+ end
218
+ should 'return a summary' do
219
+ assert(@ia.summary.size > 0)
220
+ end
221
+ end
222
+ end
223
+ end