statsample-ekatena 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,231 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+
3
+ class StatsampleRegressionTestCase < Minitest::Test
4
+ context 'Example with missing data' do
5
+ setup do
6
+ @x = Daru::Vector.new([0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857])
7
+
8
+ @y = Daru::Vector.new([nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil])
9
+ @ds = Daru::DataFrame.new({ :x => @x, :y => @y })
10
+ @lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, :y)
11
+ end
12
+ should 'have correct values' do
13
+ assert_in_delta(0.455, @lr.r2, 0.001)
14
+ assert_in_delta(0.427, @lr.r2_adjusted, 0.001)
15
+ assert_in_delta(0.1165, @lr.se_estimate, 0.001)
16
+ assert_in_delta(15.925, @lr.f, 0.0001)
17
+ assert_in_delta(0.675, @lr.standarized_coeffs[:x], 0.001)
18
+ assert_in_delta(0.778, @lr.coeffs[:x], 0.001, 'coeff x')
19
+ assert_in_delta(0.132, @lr.constant, 0.001, 'constant')
20
+ assert_in_delta(0.195, @lr.coeffs_se[:x], 0.001, 'coeff x se')
21
+ assert_in_delta(0.064, @lr.constant_se, 0.001, 'constant se')
22
+ end
23
+ end
24
+ should 'return an error if data is linearly dependent' do
25
+ samples = 100
26
+
27
+ a, b = rand, rand
28
+
29
+ x1 = Daru::Vector.new(samples.times.map { rand })
30
+ x2 = Daru::Vector.new(samples.times.map { rand })
31
+ x3 = Daru::Vector.new(samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) })
32
+ y = Daru::Vector.new(samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand })
33
+
34
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2, :x3 => x3, :y => y })
35
+
36
+ assert_raise(Statsample::Regression::LinearDependency) {
37
+ Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
38
+ }
39
+ end
40
+ def test_parameters
41
+ @x =Daru::Vector.new([13, 20, 10, 33, 15])
42
+ @y =Daru::Vector.new([23, 18, 35, 10, 27])
43
+ reg = Statsample::Regression::Simple.new_from_vectors(@x, @y)
44
+ _test_simple_regression(reg)
45
+ ds = Daru::DataFrame.new({ :x => @x, :y => @y })
46
+ reg = Statsample::Regression::Simple.new_from_dataset(ds, :x, :y)
47
+ _test_simple_regression(reg)
48
+ reg = Statsample::Regression.simple(@x, @y)
49
+ _test_simple_regression(reg)
50
+ end
51
+
52
+ def _test_simple_regression(reg)
53
+ assert_in_delta(40.009, reg.a, 0.001)
54
+ assert_in_delta(-0.957, reg.b, 0.001)
55
+ assert_in_delta(4.248, reg.standard_error, 0.002)
56
+ assert(reg.summary)
57
+ end
58
+
59
+ def test_summaries
60
+ a = Daru::Vector.new(10.times.map { rand(100) })
61
+ b = Daru::Vector.new(10.times.map { rand(100) })
62
+ y = Daru::Vector.new(10.times.map { rand(100) })
63
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :y => y })
64
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
65
+ assert(lr.summary.size > 0)
66
+ end
67
+
68
+ def test_multiple_dependent
69
+ complete = Matrix[
70
+ [1, 0.53, 0.62, 0.19, -0.09, 0.08, 0.02, -0.12, 0.08],
71
+ [0.53, 1, 0.61, 0.23, 0.1, 0.18, 0.02, -0.1, 0.15],
72
+ [0.62, 0.61, 1, 0.03, 0.1, 0.12, 0.03, -0.06, 0.12],
73
+ [0.19, 0.23, 0.03, 1, -0.02, 0.02, 0, -0.02, -0.02],
74
+ [-0.09, 0.1, 0.1, -0.02, 1, 0.05, 0.06, 0.18, 0.02],
75
+ [0.08, 0.18, 0.12, 0.02, 0.05, 1, 0.22, -0.07, 0.36],
76
+ [0.02, 0.02, 0.03, 0, 0.06, 0.22, 1, -0.01, -0.05],
77
+ [-0.12, -0.1, -0.06, -0.02, 0.18, -0.07, -0.01, 1, -0.03],
78
+ [0.08, 0.15, 0.12, -0.02, 0.02, 0.36, -0.05, -0.03, 1]]
79
+ complete.extend Statsample::CovariateMatrix
80
+ complete.fields = %w(adhd cd odd sex age monly mwork mage poverty)
81
+
82
+ lr = Statsample::Regression::Multiple::MultipleDependent.new(complete, %w(adhd cd odd))
83
+
84
+ assert_in_delta(0.197, lr.r2yx, 0.001)
85
+ assert_in_delta(0.197, lr.r2yx_covariance, 0.001)
86
+ assert_in_delta(0.07, lr.p2yx, 0.001)
87
+ end
88
+
89
+ def test_multiple_regression_pairwise_2
90
+ @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3])
91
+ @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2])
92
+ @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7])
93
+ @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil])
94
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
95
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
96
+ assert_in_delta(2407.436, lr.sst, 0.001)
97
+ assert_in_delta(0.752, lr.r, 0.001, 'pairwise r')
98
+ assert_in_delta(0.565, lr.r2, 0.001)
99
+ assert_in_delta(1361.130, lr.ssr, 0.001)
100
+ assert_in_delta(1046.306, lr.sse, 0.001)
101
+ assert_in_delta(3.035, lr.f, 0.001)
102
+ end
103
+
104
+ def test_multiple_regression_gsl
105
+ if Statsample.has_gsl?
106
+ @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
107
+ @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
108
+ @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
109
+ @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
110
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
111
+ lr = Statsample::Regression::Multiple::GslEngine.new(ds, :y)
112
+ assert(lr.summary.size > 0)
113
+ model_test(lr, 'gsl')
114
+ predicted = [1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
115
+ c_predicted = lr.predicted
116
+ predicted.each_index{|i|
117
+ assert_in_delta(predicted[i], c_predicted[i], 0.001)
118
+ }
119
+ residuals = [1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
120
+ c_residuals = lr.residuals
121
+ residuals.each_index{|i|
122
+ assert_in_delta(residuals[i], c_residuals[i], 0.001)
123
+ }
124
+ else
125
+ skip 'Regression::Multiple::GslEngine not tested (no Gsl)'
126
+ end
127
+ end
128
+
129
+ def model_test_matrix(lr, name = 'undefined')
130
+ stan_coeffs = { :a => 0.151, :b => -0.547, :c => 0.997 }
131
+ unstan_coeffs = { :a => 0.695, :b => -4.286, :c => 0.266 }
132
+
133
+ unstan_coeffs.each_key{|k|
134
+ assert_in_delta(unstan_coeffs[k], lr.coeffs[k], 0.001, "b coeffs - #{name}")
135
+ }
136
+
137
+ stan_coeffs.each_key{|k|
138
+ assert_in_delta(stan_coeffs[k], lr.standarized_coeffs[k], 0.001, "beta coeffs - #{name}")
139
+ }
140
+
141
+ assert_in_delta(11.027, lr.constant, 0.001)
142
+
143
+ assert_in_delta(0.955, lr.r, 0.001)
144
+ assert_in_delta(0.913, lr.r2, 0.001)
145
+
146
+ assert_in_delta(20.908, lr.f, 0.001)
147
+ assert_in_delta(0.001, lr.probability, 0.001)
148
+ assert_in_delta(0.226, lr.tolerance(:a), 0.001)
149
+
150
+ coeffs_se = { :a => 1.171, :b => 1.129, :c => 0.072 }
151
+
152
+ ccoeffs_se = lr.coeffs_se
153
+ coeffs_se.each_key{|k|
154
+ assert_in_delta(coeffs_se[k], ccoeffs_se[k], 0.001)
155
+ }
156
+ coeffs_t = { :a => 0.594, :b => -3.796, :c => 3.703 }
157
+ ccoeffs_t = lr.coeffs_t
158
+ coeffs_t.each_key{|k|
159
+ assert_in_delta(coeffs_t[k], ccoeffs_t[k], 0.001)
160
+ }
161
+
162
+ assert_in_delta(639.6, lr.sst, 0.001)
163
+ assert_in_delta(583.76, lr.ssr, 0.001)
164
+ assert_in_delta(55.840, lr.sse, 0.001)
165
+ assert(lr.summary.size > 0, "#{name} without summary")
166
+ end
167
+
168
+ def model_test(lr, name = 'undefined')
169
+ model_test_matrix(lr, name)
170
+ assert_in_delta(4.559, lr.constant_se, 0.001)
171
+ assert_in_delta(2.419, lr.constant_t, 0.001)
172
+
173
+ assert_in_delta(1.785, lr.process([1, 3, 11]), 0.001)
174
+ end
175
+
176
+ def test_regression_matrix
177
+ @a = Daru::Vector.new([1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
178
+ @b = Daru::Vector.new([3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
179
+ @c = Daru::Vector.new([11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
180
+ @y = Daru::Vector.new([3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
181
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
182
+ cor = Statsample::Bivariate.correlation_matrix(ds)
183
+
184
+ lr = Statsample::Regression::Multiple::MatrixEngine.new(
185
+ cor, :y, y_mean: @y.mean,
186
+ x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean },
187
+ cases: @a.size, y_sd: @y.sd, x_sd: { :a => @a.sd, :b => @b.sd, :c => @c.sd })
188
+ assert_nil(lr.constant_se)
189
+ assert_nil(lr.constant_t)
190
+ model_test_matrix(lr, 'correlation matrix')
191
+
192
+ covariance = Statsample::Bivariate.covariance_matrix(ds)
193
+ lr = Statsample::Regression::Multiple::MatrixEngine.new(
194
+ covariance, :y, y_mean: @y.mean,
195
+ x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean }, cases: @a.size)
196
+ assert(lr.summary.size > 0)
197
+
198
+ model_test(lr, 'covariance matrix')
199
+ end
200
+
201
+ def test_regression_rubyengine
202
+ @a = Daru::Vector.new([nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
203
+ @b = Daru::Vector.new([nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
204
+ @c = Daru::Vector.new([nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
205
+ @y = Daru::Vector.new([nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
206
+ ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
207
+ lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
208
+ assert_equal(11, lr.total_cases)
209
+ assert_equal(10, lr.valid_cases)
210
+ model_test(lr, 'rubyengine with missing data')
211
+
212
+ predicted = [nil, 1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
213
+ c_predicted = lr.predicted
214
+ predicted.each_index do |i|
215
+ if c_predicted[i].nil?
216
+ assert(predicted[i].nil?, "Actual #{i} is nil, but expected #{predicted[i]}")
217
+ else
218
+ assert_in_delta(predicted[i], c_predicted[i], 0.001)
219
+ end
220
+ end
221
+ residuals = [nil, 1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
222
+ c_residuals = lr.residuals
223
+ residuals.each_index do |i|
224
+ if c_residuals[i].nil?
225
+ assert(residuals[i].nil?)
226
+ else
227
+ assert_in_delta(residuals[i], c_residuals[i], 0.001)
228
+ end
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,223 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleReliabilityTestCase < Minitest::Test
3
+ context Statsample::Reliability do
4
+ should 'return correct r according to Spearman-Brown prophecy' do
5
+ r = 0.6849
6
+ n = 62.quo(15)
7
+ assert_in_delta(0.9, Statsample::Reliability.sbp(r, n), 0.001)
8
+ end
9
+ should 'return correct n for desired realiability' do
10
+ r = 0.6849
11
+ r_d = 0.9
12
+ assert_in_delta(62, Statsample::Reliability.n_for_desired_reliability(r, r_d, 15), 0.5)
13
+ end
14
+ context "Cronbach's alpha" do
15
+ setup do
16
+ @samples = 40
17
+ @n_variables = rand(10) + 2
18
+ @ds = Daru::DataFrame.new({}, index: @samples)
19
+ base = Daru::Vector.new(@samples.times.collect { |_a| rand })
20
+ @n_variables.times do |i|
21
+ @ds[i] = Daru::Vector.new(base.collect { |v| v + rand })
22
+ end
23
+
24
+ @k = @ds.ncols
25
+ @cm = Statsample::Bivariate.covariance_matrix(@ds)
26
+ @dse = @ds.dup
27
+ @dse.vectors.each do |f|
28
+ @dse[f] = @dse[f].standardize
29
+ end
30
+ @cme = Statsample::Bivariate.covariance_matrix(@dse)
31
+ @a = Statsample::Reliability.cronbach_alpha(@ds)
32
+ @as = Statsample::Reliability.cronbach_alpha_standarized(@ds)
33
+ end
34
+ should 'alpha will be equal to sum of matrix covariance less the individual variances' do
35
+ total_sum = @cm.total_sum
36
+ ind_var = @ds.vectors.to_a.inject(0) { |ac, v| ac + @ds[v].variance }
37
+ expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
38
+ assert_in_delta(expected, @a, 1e-10)
39
+ end
40
+ should 'method cronbach_alpha_from_n_s2_cov return correct values' do
41
+ sa = Statsample::Reliability::ScaleAnalysis.new(@ds)
42
+ vm, cm = sa.variances_mean, sa.covariances_mean
43
+ assert_in_delta(sa.alpha, Statsample::Reliability.cronbach_alpha_from_n_s2_cov(@n_variables, vm, cm), 1e-10)
44
+ end
45
+ should 'method cronbach_alpha_from_covariance_matrix returns correct value' do
46
+ cov = Statsample::Bivariate.covariance_matrix(@ds)
47
+ assert_in_delta(@a, Statsample::Reliability.cronbach_alpha_from_covariance_matrix(cov), 0.0000001)
48
+ end
49
+ should 'return correct n for desired alpha, covariance and variance' do
50
+ sa = Statsample::Reliability::ScaleAnalysis.new(@ds)
51
+ vm, cm = sa.variances_mean, sa.covariances_mean
52
+ n_obtained = Statsample::Reliability.n_for_desired_alpha(@a, vm, cm)
53
+ # p n_obtained
54
+ assert_in_delta(Statsample::Reliability.cronbach_alpha_from_n_s2_cov(n_obtained, vm, cm), @a, 0.001)
55
+ end
56
+
57
+ should 'standarized alpha will be equal to sum of matrix covariance less the individual variances on standarized values' do
58
+ total_sum = @cme.total_sum
59
+ ind_var = @dse.vectors.to_a.inject(0) { |ac, v| ac + @dse[v].variance }
60
+ expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
61
+ assert_in_delta(expected, @as, 1e-10)
62
+ end
63
+ end
64
+ context Statsample::Reliability::ItemCharacteristicCurve do
65
+ setup do
66
+ @samples = 100
67
+ @points = rand(10) + 3
68
+ @max_point = (@points - 1) * 3
69
+ @x1 = Daru::Vector.new(@samples.times.map { rand(@points) })
70
+ @x2 = Daru::Vector.new(@samples.times.map { rand(@points) })
71
+ @x3 = Daru::Vector.new(@samples.times.map { rand(@points) })
72
+ @ds = Daru::DataFrame.new({ :a => @x1, :b => @x2, :c => @x3 })
73
+ @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
74
+ end
75
+ should 'have a correct automatic vector_total' do
76
+ assert_equal(@ds.vector_sum, @icc.vector_total)
77
+ end
78
+ should 'have a correct different vector_total' do
79
+ x2 = Daru::Vector.new(@samples.times.map { rand(10) })
80
+ @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, x2)
81
+ assert_equal(x2, @icc.vector_total)
82
+ assert_raises(ArgumentError) do
83
+ inc = Daru::Vector.new((@samples + 10).times.map { rand(10) })
84
+ @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, inc)
85
+ end
86
+ end
87
+ should 'have 0% for 0 points on maximum value values' do
88
+ max = @icc.curve_field(:a, 0)[@max_point.to_f]
89
+ max ||= 0
90
+ assert_in_delta(0, max)
91
+ end
92
+ should 'have 0 for max value on minimum value' do
93
+ max = @icc.curve_field(:a, @max_point)[0.0]
94
+ max ||= 0
95
+ assert_in_delta(0, max)
96
+ end
97
+ should 'have correct values of % for any value' do
98
+ sum = @icc.vector_total
99
+ total = {}
100
+ total_g = sum.frequencies
101
+ index = rand(@points)
102
+ @x1.each_with_index do |v, i|
103
+ total[sum[i]] ||= 0
104
+ total[sum[i]] += 1 if v == index
105
+ end
106
+ expected = total.each {|k, v|
107
+ total[k] = v.quo(total_g[k])
108
+ }
109
+ assert_equal(expected, @icc.curve_field(:a, index))
110
+ end
111
+ end
112
+
113
+ context Statsample::Reliability::MultiScaleAnalysis do
114
+ setup do
115
+ size = 100
116
+ @scales = 3
117
+ @items_per_scale = 10
118
+ h = {}
119
+ @scales.times {|s|
120
+ @items_per_scale.times {|i|
121
+ h["#{s}_#{i}".to_sym] = Daru::Vector.new((size.times.map { (s * 2) + rand }))
122
+ }
123
+ }
124
+ @ds = Daru::DataFrame.new(h)
125
+ @msa = Statsample::Reliability::MultiScaleAnalysis.new(name: 'Multiple Analysis') do |m|
126
+ m.scale 'complete', @ds
127
+ @scales.times {|s|
128
+ m.scale "scale_#{s}", @ds.clone(*@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}"
129
+ }
130
+ end
131
+ end
132
+
133
+ should 'Retrieve correct ScaleAnalysis for whole scale' do
134
+ sa = Statsample::Reliability::ScaleAnalysis.new(@ds, name: 'Scale complete')
135
+ assert_equal(sa.variances_mean, @msa.scale('complete').variances_mean)
136
+ end
137
+ should 'Retrieve correct ScaleAnalysis for each scale' do
138
+ @scales.times {|s|
139
+ sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}")
140
+ assert_equal(sa.variances_mean, @msa.scale("scale_#{s}").variances_mean)
141
+ }
142
+ end
143
+ should 'retrieve correct correlation matrix for each scale' do
144
+ vectors = { :complete => @ds.vector_sum }
145
+ @scales.times {|s|
146
+ vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
147
+ }
148
+ ds2 = Daru::DataFrame.new(vectors)
149
+ assert_equal(Statsample::Bivariate.correlation_matrix(ds2), @msa.correlation_matrix)
150
+ end
151
+ should 'delete scale using delete_scale' do
152
+ @msa.delete_scale('complete')
153
+ assert_equal(@msa.scales.keys.sort, @scales.times.map { |s| "scale_#{s}" })
154
+ end
155
+ should 'retrieve pca for scales' do
156
+ @msa.delete_scale('complete')
157
+ vectors = {}
158
+ @scales.times {|s|
159
+ vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
160
+ }
161
+ ds2 = Daru::DataFrame.new(vectors)
162
+ cor_matrix = Statsample::Bivariate.correlation_matrix(ds2)
163
+ m = 3
164
+ pca = Statsample::Factor::PCA.new(cor_matrix, m: m)
165
+ assert_equal(pca.component_matrix, @msa.pca(m: m).component_matrix)
166
+ end
167
+ should 'retrieve acceptable summary' do
168
+ @msa.delete_scale('scale_0')
169
+ @msa.delete_scale('scale_1')
170
+ @msa.delete_scale('scale_2')
171
+
172
+ # @msa.summary_correlation_matrix=true
173
+ # @msa.summary_pca=true
174
+
175
+ assert(@msa.summary.size > 0)
176
+ end
177
+ end
178
+ context Statsample::Reliability::ScaleAnalysis do
179
+ setup do
180
+ @x1 = Daru::Vector.new([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30])
181
+ @x2 = Daru::Vector.new([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50])
182
+ @x3 = Daru::Vector.new([2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40])
183
+ @x4 = Daru::Vector.new([1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30])
184
+ @ds = Daru::DataFrame.new({ :x1 => @x1, :x2 => @x2, :x3 => @x3, :x4 => @x4 })
185
+ @ia = Statsample::Reliability::ScaleAnalysis.new(@ds)
186
+ @cov_matrix = @ia.cov_m
187
+ end
188
+ should 'return correct values for item analysis' do
189
+ assert_in_delta(0.980, @ia.alpha, 0.001)
190
+ assert_in_delta(0.999, @ia.alpha_standarized, 0.001)
191
+ var_mean = Daru::Vector.new(4.times.map { |m| @cov_matrix[m, m] }).mean
192
+ assert_in_delta(var_mean, @ia.variances_mean)
193
+ assert_equal(@x1.mean, @ia.item_statistics[:x1][:mean])
194
+ assert_equal(@x4.mean, @ia.item_statistics[:x4][:mean])
195
+ assert_in_delta(@x1.sds, @ia.item_statistics[:x1][:sds], 1e-14)
196
+ assert_in_delta(@x4.sds, @ia.item_statistics[:x4][:sds], 1e-14)
197
+ ds2 = @ds.clone
198
+ ds2.delete_vector(:x1)
199
+ vector_sum = ds2.vector_sum
200
+ assert_equal(vector_sum.mean, @ia.stats_if_deleted[:x1][:mean])
201
+ assert_equal(vector_sum.sds, @ia.stats_if_deleted[:x1][:sds])
202
+ assert_in_delta(vector_sum.variance, @ia.stats_if_deleted[:x1][:variance_sample], 1e-10)
203
+
204
+ assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted[:x1][:alpha])
205
+
206
+ covariances = []
207
+ 4.times.each {|i|
208
+ 4.times.each {|j|
209
+ if i != j
210
+ covariances.push(@cov_matrix[i, j])
211
+ end
212
+ }
213
+ }
214
+ assert_in_delta(Daru::Vector.new(covariances).mean, @ia.covariances_mean)
215
+ assert_in_delta(0.999, @ia.item_total_correlation[:x1], 0.001)
216
+ assert_in_delta(1050.455, @ia.stats_if_deleted[:x1][:variance_sample], 0.001)
217
+ end
218
+ should 'return a summary' do
219
+ assert(@ia.summary.size > 0)
220
+ end
221
+ end
222
+ end
223
+ end