statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,16 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleAwesomePrintBug < Minitest::Test
3
+ context('Awesome Print integration') do
4
+ setup do
5
+ require 'awesome_print'
6
+ end
7
+ should 'should be flawless' do
8
+ a = Daru::Vector.new([1, 2, 3])
9
+
10
+ assert(a != [1, 2, 3])
11
+ assert_nothing_raised do
12
+ ap a
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,25 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+
3
+ class StatsampleBartlettSphericityTestCase < Minitest::Test
4
+ include Statsample::Test
5
+ context Statsample::Test::BartlettSphericity do
6
+ setup do
7
+ @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
8
+ @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
9
+ @v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
10
+ # KMO: 0.490
11
+ ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
12
+ cor = Statsample::Bivariate.correlation_matrix(ds)
13
+ @bs = Statsample::Test::BartlettSphericity.new(cor, 14)
14
+ end
15
+ should 'have correct value for chi' do
16
+ assert_in_delta(9.477, @bs.value, 0.001)
17
+ end
18
+ should 'have correct value for df' do
19
+ assert_equal(3, @bs.df)
20
+ end
21
+ should 'have correct value for probability' do
22
+ assert_in_delta(0.024, @bs.probability, 0.001)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,164 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleBivariateTestCase < Minitest::Test
3
+ should 'method sum of squares should be correct' do
4
+ v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6])
5
+ v2 = Daru::Vector.new([6, 2, 4, 10, 12, 8])
6
+ assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1, v2))
7
+ end
8
+ should_with_gsl 'return same covariance with ruby and gls implementation' do
9
+ v1 = Daru::Vector.new(20.times.collect { |_a| rand })
10
+ v2 = Daru::Vector.new(20.times.collect { |_a| rand })
11
+ assert_in_delta(Statsample::Bivariate.covariance(v1, v2), Statsample::Bivariate.covariance_slow(v1, v2), 0.001)
12
+ end
13
+
14
+ should_with_gsl 'return same correlation with ruby and gls implementation' do
15
+ v1 = Daru::Vector.new(20.times.collect { |_a| rand })
16
+ v2 = Daru::Vector.new(20.times.collect { |_a| rand })
17
+
18
+ assert_in_delta(GSL::Stats.correlation(v1.to_gsl, v2.to_gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
19
+ end
20
+ should 'return correct pearson correlation' do
21
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
22
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
23
+ assert_in_delta(0.525, Statsample::Bivariate.pearson(v1, v2), 0.001)
24
+ assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v1, v2), 0.001)
25
+
26
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil])
27
+ v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500])
28
+ assert_in_delta(0.525, Statsample::Bivariate.pearson(v3, v4), 0.001)
29
+ # Test ruby method
30
+ v3a, v4a = Statsample.only_valid v3, v4
31
+ assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a, v4a), 0.001)
32
+ end
33
+ should 'return correct values for t_pearson and prop_pearson' do
34
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
35
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
36
+ r = Statsample::Bivariate::Pearson.new(v1, v2)
37
+ assert_in_delta(0.525, r.r, 0.001)
38
+ assert_in_delta(Statsample::Bivariate.t_pearson(v1, v2), r.t, 0.001)
39
+ assert_in_delta(Statsample::Bivariate.prop_pearson(r.t, 8, :both), r.probability, 0.001)
40
+ assert(r.summary.size > 0)
41
+ end
42
+ should 'return correct correlation_matrix with nils values' do
43
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
44
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
45
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
46
+ v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6])
47
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
48
+ c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
49
+ expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
50
+ [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
51
+ ]
52
+ obt = Statsample::Bivariate.correlation_matrix(ds)
53
+ for i in 0...expected.row_size
54
+ for j in 0...expected.column_size
55
+ # puts expected[i,j].inspect
56
+ # puts obt[i,j].inspect
57
+ assert_in_delta(expected[i, j], obt[i, j], 0.0001, "#{expected[i, j].class}!=#{obt[i, j].class} ")
58
+ end
59
+ end
60
+ # assert_equal(expected,obt)
61
+ end
62
+ should_with_gsl 'return same values for optimized and pairwise covariance matrix' do
63
+ cases = 100
64
+ v1 = Daru::Vector.new_with_size(cases) { rand }
65
+ v2 = Daru::Vector.new_with_size(cases) { rand }
66
+ v3 = Daru::Vector.new_with_size(cases) { rand }
67
+ v4 = Daru::Vector.new_with_size(cases) { rand }
68
+ v5 = Daru::Vector.new_with_size(cases) { rand }
69
+
70
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
71
+
72
+ cor_opt = Statsample::Bivariate.covariance_matrix_optimized(ds)
73
+
74
+ cor_pw = Statsample::Bivariate.covariance_matrix_pairwise(ds)
75
+ assert_equal_matrix(cor_opt, cor_pw, 1e-15)
76
+ end
77
+ should_with_gsl 'return same values for optimized and pairwise correlation matrix' do
78
+ cases = 100
79
+ v1 = Daru::Vector.new_with_size(cases) { rand }
80
+ v2 = Daru::Vector.new_with_size(cases) { rand }
81
+ v3 = Daru::Vector.new_with_size(cases) { rand }
82
+ v4 = Daru::Vector.new_with_size(cases) { rand }
83
+ v5 = Daru::Vector.new_with_size(cases) { rand }
84
+
85
+ ds = Daru::DataFrame.new({
86
+ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
87
+
88
+ cor_opt = Statsample::Bivariate.correlation_matrix_optimized(ds)
89
+
90
+ cor_pw = Statsample::Bivariate.correlation_matrix_pairwise(ds)
91
+ assert_equal_matrix(cor_opt, cor_pw, 1e-15)
92
+ end
93
+ should 'return correct correlation_matrix without nils values' do
94
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
95
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
96
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
97
+ v4 = Daru::Vector.new([2, 4, 6, 7, 3, 7, 8, 6])
98
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
99
+ c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
100
+ expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
101
+ [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
102
+ ]
103
+ obt = Statsample::Bivariate.correlation_matrix(ds)
104
+ for i in 0...expected.row_size
105
+ for j in 0...expected.column_size
106
+ # puts expected[i,j].inspect
107
+ # puts obt[i,j].inspect
108
+ assert_in_delta(expected[i, j], obt[i, j], 0.0001, "#{expected[i, j].class}!=#{obt[i, j].class} ")
109
+ end
110
+ end
111
+ # assert_equal(expected,obt)
112
+ end
113
+
114
+ should 'return correct value for prop pearson' do
115
+ assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084, 94), 94), 0.01)
116
+ assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046, 95), 95), 0.01)
117
+ r = 0.9
118
+ n = 100
119
+ t = Statsample::Bivariate.t_r(r, n)
120
+ assert(Statsample::Bivariate.prop_pearson(t, n, :both) < 0.05)
121
+ assert(Statsample::Bivariate.prop_pearson(t, n, :right) < 0.05)
122
+ assert(Statsample::Bivariate.prop_pearson(t, n, :left) > 0.05)
123
+
124
+ r = -0.9
125
+ n = 100
126
+ t = Statsample::Bivariate.t_r(r, n)
127
+ assert(Statsample::Bivariate.prop_pearson(t, n, :both) < 0.05)
128
+ assert(Statsample::Bivariate.prop_pearson(t, n, :right) > 0.05)
129
+ assert(Statsample::Bivariate.prop_pearson(t, n, :left) < 0.05)
130
+ end
131
+
132
+ should "return correct value for Spearman's rho" do
133
+ v1 =Daru::Vector.new( [86, 97, 99, 100, 101, 103, 106, 110, 112, 113])
134
+ v2 =Daru::Vector.new( [0, 20, 28, 27, 50, 29, 7, 17, 6, 12])
135
+ assert_in_delta(-0.175758, Statsample::Bivariate.spearman(v1, v2), 0.0001)
136
+ end
137
+ should 'return correct value for point_biserial correlation' do
138
+ c = Daru::Vector.new([1, 3, 5, 6, 7, 100, 200, 300, 400, 300])
139
+ d = Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
140
+ assert_raises TypeError do
141
+ Statsample::Bivariate.point_biserial(c, d)
142
+ end
143
+ assert_in_delta(Statsample::Bivariate.point_biserial(d, c), Statsample::Bivariate.pearson(d, c), 0.0001)
144
+ end
145
+ should 'return correct value for tau_a and tau_b' do
146
+ v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
147
+ v2 = Daru::Vector.new([1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11])
148
+ assert_in_delta(0.6727, Statsample::Bivariate.tau_a(v1, v2), 0.001)
149
+ assert_in_delta(0.6727, Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1, v2).to_matrix)), 0.001)
150
+ v1 = Daru::Vector.new([12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27])
151
+ v2 = Daru::Vector.new([11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0])
152
+ assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1, v2).to_matrix), 0.001)
153
+ end
154
+ should 'return correct value for gamma correlation' do
155
+ m = Matrix[[10, 5, 2], [10, 15, 20]]
156
+ assert_in_delta(0.636, Statsample::Bivariate.gamma(m), 0.001)
157
+ m2 = Matrix[[15, 12, 6, 5], [12, 8, 10, 8], [4, 6, 9, 10]]
158
+ assert_in_delta(0.349, Statsample::Bivariate.gamma(m2), 0.001)
159
+ end
160
+
161
+ should 'return correct residuals' do
162
+ # TODO: test Statsample::Bivariate.residuals
163
+ end
164
+ end
@@ -0,0 +1,78 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleCodificationTestCase < Minitest::Test
3
+ def initialize(*args)
4
+ v1 = Daru::Vector.new(%w(run walk,run walking running sleep sleeping,dreaming sleep,dream))
5
+ @dict = { 'run' => 'r', 'walk' => 'w', 'walking' => 'w', 'running' => 'r', 'sleep' => 's', 'sleeping' => 's', 'dream' => 'd', 'dreaming' => 'd' }
6
+ @ds = Daru::DataFrame.new({ :v1 => v1 })
7
+ super
8
+ end
9
+
10
+ def test_create_hash
11
+ expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
12
+ hash = Statsample::Codification.create_hash(@ds, [:v1])
13
+ assert_equal([:v1], hash.keys)
14
+ assert_equal(expected_keys_v1, hash[:v1].keys.sort)
15
+ assert_equal(expected_keys_v1, hash[:v1].values.sort)
16
+ end
17
+
18
+ def test_create_excel
19
+ filename = Dir.tmpdir + '/test_excel' + Time.now.to_s + '.xls'
20
+ # filename = Tempfile.new("test_codification_"+Time.now().to_s)
21
+ Statsample::Codification.create_excel(@ds, ['v1'], filename)
22
+ field = Daru::Vector.new(['v1'] * 8, name: :field)
23
+ keys = Daru::Vector.new(%w(dream dreaming run running sleep sleeping walk walking))
24
+ ds = Daru::DataFrame.from_excel(filename)
25
+ assert_equal(field, ds[:field])
26
+ assert_equal(keys, ds[:original])
27
+ assert_equal(keys, ds[:recoded])
28
+ hash = Statsample::Codification.excel_to_recoded_hash(filename)
29
+ assert_equal(keys.to_a, hash[:v1].keys.sort)
30
+ assert_equal(keys.to_a, hash[:v1].values.sort)
31
+ end
32
+
33
+ def test_create_yaml
34
+ assert_raise ArgumentError do
35
+ Statsample::Codification.create_yaml(@ds, [])
36
+ end
37
+ expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
38
+ yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1])
39
+ h = YAML.load(yaml_hash)
40
+ assert_equal([:v1], h.keys)
41
+ assert_equal(expected_keys_v1, h[:v1].keys.sort)
42
+ tf = Tempfile.new('test_codification')
43
+ yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1], tf, Statsample::SPLIT_TOKEN)
44
+ tf.close
45
+ tf.open
46
+ h = YAML.load(tf)
47
+ assert_equal([:v1], h.keys)
48
+ assert_equal(expected_keys_v1, h[:v1].keys.sort)
49
+ tf.close(true)
50
+ end
51
+
52
+ def test_recodification
53
+ expected = [['r'], %w(w r), ['w'], ['r'], ['s'], %w(s d), %w(s d)]
54
+ assert_equal(expected, Statsample::Codification.recode_vector(@ds[:v1], @dict))
55
+ v2 = Daru::Vector.new(['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'])
56
+ expected = [['r'], %w(w d), nil, %w(w d)]
57
+ assert_equal(expected, Statsample::Codification.recode_vector(v2, @dict))
58
+ end
59
+
60
+ def test_recode_dataset_simple
61
+ Statsample::Codification.recode_dataset_simple!(@ds, :v1 => @dict)
62
+ expected_vector = Daru::Vector.new(['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'])
63
+ assert_not_equal(expected_vector, @ds[:v1])
64
+ assert_equal(expected_vector, @ds[:v1_recoded])
65
+ end
66
+
67
+ def test_recode_dataset_split
68
+ Statsample::Codification.recode_dataset_split!(@ds, :v1 => @dict)
69
+ e = {}
70
+ e['r'] = Daru::Vector.new([1, 1, 0, 1, 0, 0, 0])
71
+ e['w'] = Daru::Vector.new([0, 1, 1, 0, 0, 0, 0])
72
+ e['s'] = Daru::Vector.new([0, 0, 0, 0, 1, 1, 1])
73
+ e['d'] = Daru::Vector.new([0, 0, 0, 0, 0, 1, 1])
74
+ e.each { |k, expected|
75
+ assert_equal(expected, @ds[('v1_' + k).to_sym], "Error on key #{k}")
76
+ }
77
+ end
78
+ end
@@ -0,0 +1,67 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleCrosstabTestCase < Minitest::Test
3
+ def initialize(*args)
4
+ @v1 =Daru::Vector.new( %w(black blonde black black red black brown black blonde black red black blonde))
5
+ @v2 =Daru::Vector.new( %w(woman man man woman man man man woman man woman woman man man))
6
+ @ct = Statsample::Crosstab.new(@v1, @v2)
7
+ super
8
+ end
9
+
10
+ def test_crosstab_errors
11
+ e1 = %w(black blonde black black red black brown black blonde black)
12
+ assert_raise ArgumentError do
13
+ Statsample::Crosstab.new(e1, @v2)
14
+ end
15
+ e2 = Daru::Vector.new(%w(black blonde black black red black brown black blonde black black))
16
+
17
+ assert_raise ArgumentError do
18
+ Statsample::Crosstab.new(e2, @v2)
19
+ end
20
+ assert_nothing_raised do
21
+ Statsample::Crosstab.new(@v1, @v2)
22
+ end
23
+ end
24
+
25
+ def test_crosstab_basic
26
+ assert_equal(Daru::Vector.new(%w(black blonde brown red)), @ct.rows_names)
27
+ assert_equal(Daru::Vector.new(%w(man woman)), @ct.cols_names)
28
+ assert_equal({ 'black' => 7, 'blonde' => 3, 'red' => 2, 'brown' => 1 }, @ct.rows_total)
29
+ assert_equal({ 'man' => 8, 'woman' => 5 }, @ct.cols_total)
30
+ end
31
+
32
+ def test_crosstab_frequencies
33
+ fq = @ct.frequencies
34
+ assert_equal(8, fq.size)
35
+ sum = fq.inject(0) { |s, x| s + x[1] }
36
+ assert_equal(13, sum)
37
+ fr = @ct.frequencies_by_row
38
+ assert_equal(4, fr.size)
39
+ assert_equal(%w(black blonde brown red), fr.keys.sort)
40
+ fc = @ct.frequencies_by_col
41
+ assert_equal(2, fc.size)
42
+ assert_equal(%w(man woman), fc.keys.sort)
43
+ assert_equal(Matrix.rows([[3, 4], [3, 0], [1, 0], [1, 1]]), @ct.to_matrix)
44
+ end
45
+
46
+ def test_summary
47
+ @ct.percentage_row = true
48
+ @ct.percentage_column = true
49
+ @ct.percentage_total = true
50
+ assert(@ct.summary.size > 0)
51
+ end
52
+
53
+ def test_expected
54
+ v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
55
+ v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
56
+ ct = Statsample::Crosstab.new(v1, v2)
57
+ assert_equal(Matrix[[2.5, 2.5], [2.5, 2.5]], ct.matrix_expected)
58
+ end
59
+
60
+ def test_crosstab_with_scale
61
+ v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
62
+ v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
63
+ ct = Statsample::Crosstab.new(v1, v2)
64
+ assert_equal(Matrix[[0, 5], [5, 0]], ct.to_matrix)
65
+ assert_nothing_raised { ct.summary }
66
+ end
67
+ end
@@ -0,0 +1,39 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleDominanceAnalysisTestCase < Minitest::Test
3
+ def test_dominance_univariate
4
+ # Example from Budescu (1993)
5
+ m = Matrix[[1, 0.683, 0.154, 0.460, 0.618], [0.683, 1, -0.050, 0.297, 0.461], [0.154, -0.050, 1, 0.006, 0.262], [0.460, 0.297, 0.006, 1, 0.507], [0.618, 0.461, 0.262, 0.507, 1]]
6
+ m.extend Statsample::CovariateMatrix
7
+ m.fields = %w(x1 x2 x3 x4 y)
8
+ da = Statsample::DominanceAnalysis.new(m, 'y')
9
+
10
+ contr_x1 = { 'x2' => 0.003, 'x3' => 0.028, 'x4' => 0.063 }
11
+ contr_x1.each do |k, v|
12
+ assert_in_delta(v, da.models_data[['x1']].contributions[k], 0.001)
13
+ end
14
+ assert_in_delta(0.052, da.models_data[%w(x2 x3 x4)].contributions['x1'], 0.001)
15
+ expected_dominances = [1, 1, 0.5, 0.5, 0, 0]
16
+ expected_g_dominances = [1, 1, 1, 1, 0, 0]
17
+
18
+ da.pairs.each_with_index do |a, i|
19
+ assert_equal(expected_dominances[i], da.total_dominance_pairwise(a[0], a[1]))
20
+ assert_equal(expected_dominances[i], da.conditional_dominance_pairwise(a[0], a[1]))
21
+ assert_equal(expected_g_dominances[i], da.general_dominance_pairwise(a[0], a[1]))
22
+ end
23
+ assert(da.summary.size > 0)
24
+ end
25
+
26
+ def test_dominance_multivariate
27
+ m = Matrix[[1.0, -0.19, -0.358, -0.343, 0.359, 0.257], [-0.19, 1.0, 0.26, 0.29, -0.11, -0.11], [-0.358, 0.26, 1.0, 0.54, -0.49, -0.23], [-0.343, 0.29, 0.54, 1.0, -0.22, -0.41], [0.359, -0.11, -0.49, -0.22, 1.0, 0.62], [0.257, -0.11, -0.23, -0.41, 0.62, 1]]
28
+ m.extend Statsample::CovariateMatrix
29
+ m.fields = %w(y1 y2 x1 x2 x3 x4)
30
+ m2 = m.submatrix(%w(y1 x1 x2 x3 x4))
31
+
32
+ da = Statsample::DominanceAnalysis.new(m, %w(y1 y2), cases: 683, method_association: :p2yx)
33
+
34
+ contr_x1 = { 'x2' => 0.027, 'x3' => 0.024, 'x4' => 0.017 }
35
+ contr_x1.each do |k, v|
36
+ assert_in_delta(v, da.models_data[['x1']].contributions[k], 0.003)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,228 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ # require 'rserve'
3
+ # require 'statsample/rserve_extension'
4
+
5
+ class StatsampleFactorTestCase < Minitest::Test
6
+ include Statsample::Fixtures
7
+ # Based on Hardle and Simar
8
+ def setup
9
+ @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
10
+ end
11
+
12
+ # Based on Hurdle example
13
+ def test_covariance_matrix
14
+ ds = Daru::DataFrame.from_plaintext(@fixtures_dir + '/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6])
15
+ ds.vectors.each {|f|
16
+ ds[f] = ds[f].center
17
+ }
18
+ cm = Statsample::Bivariate.covariance_matrix ds
19
+ pca = Statsample::Factor::PCA.new(cm, m: 6)
20
+ # puts pca.summary
21
+ # puts pca.feature_matrix
22
+ exp_eig = Daru::Vector.new([2.985, 0.931, 0.242, 0.194, 0.085, 0.035])
23
+ assert_similar_vector(exp_eig, Daru::Vector.new(pca.eigenvalues), 0.1)
24
+ pcs = pca.principal_components(ds)
25
+ k = 6
26
+ comp_matrix = pca.component_matrix
27
+ k.times {|i|
28
+ pc_id = "PC_#{i + 1}".to_sym
29
+ k.times {|j| # variable
30
+ ds_id = "v#{j + 1}".to_sym
31
+ r = Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
32
+ assert_in_delta(r, comp_matrix[j, i])
33
+ }
34
+ }
35
+ end
36
+
37
+ def test_principalcomponents_ruby_gsl
38
+ if Statsample.has_gsl?
39
+ ran = Distribution::Normal.rng
40
+
41
+ # @r=::Rserve::Connection.new
42
+
43
+ samples = 20
44
+ [3, 5, 7].each {|k|
45
+ v = {}
46
+ v[:x0] = Daru::Vector.new(samples.times.map { ran.call }).center
47
+ (1...k).each { |i|
48
+ v["x#{i}".to_sym] = Daru::Vector.new(samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}".to_sym][ii] * 0.5 }).center
49
+ }
50
+
51
+ ds = Daru::DataFrame.new(v)
52
+ cm = Statsample::Bivariate.covariance_matrix ds
53
+ # @r.assign('ds',ds)
54
+ # @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
55
+ # puts "eigenvalues"
56
+ # puts @r.eval('v').to_ruby.to_s
57
+ pca_ruby = Statsample::Factor::PCA.new(cm, m: k, use_gsl: false)
58
+ pca_gsl = Statsample::Factor::PCA.new(cm, m: k, use_gsl: true)
59
+ pc_ruby = pca_ruby.principal_components(ds)
60
+ pc_gsl = pca_gsl.principal_components(ds)
61
+ # Test component matrix correlation!
62
+ cm_ruby = pca_ruby.component_matrix
63
+ # puts cm_ruby.summary
64
+ k.times {|i|
65
+ pc_id = "PC_#{i + 1}".to_sym
66
+ assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
67
+ # Revert gsl component values
68
+ pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
69
+ assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
70
+ if false
71
+ k.times {|j| # variable
72
+ ds_id = "x#{j}".to_sym
73
+ r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
74
+ puts "#{pc_id}-#{ds_id}:#{r}"
75
+ }
76
+ end
77
+ }
78
+ }
79
+ end
80
+ # @r.close
81
+ end
82
+
83
+ def test_principalcomponents
84
+ if Statsample.has_gsl?
85
+ principalcomponents(true)
86
+ else
87
+ skip "Require GSL"
88
+ end
89
+ principalcomponents(false)
90
+ end
91
+
92
+ def principalcomponents(gsl)
93
+ ran = Distribution::Normal.rng
94
+ samples = 50
95
+ x1 = Daru::Vector.new(samples.times.map { ran.call })
96
+ x2 = Daru::Vector.new(samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 })
97
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
98
+
99
+ cm = Statsample::Bivariate.correlation_matrix ds
100
+ r = cm[0, 1]
101
+ pca = Statsample::Factor::PCA.new(cm, m: 2, use_gsl: gsl)
102
+ assert_in_delta(1 + r, pca.eigenvalues[0], 1e-10)
103
+ assert_in_delta(1 - r, pca.eigenvalues[1], 1e-10)
104
+ hs = 1.0 / Math.sqrt(2)
105
+ assert_equal_vector(Vector[1, 1] * hs, pca.eigenvectors[0])
106
+ m_1 = gsl ? Vector[-1, 1] : Vector[1, -1]
107
+
108
+ assert_equal_vector(hs * m_1, pca.eigenvectors[1])
109
+
110
+ pcs = pca.principal_components(ds)
111
+ exp_pc_1 = ds.collect_row_with_index {|row, _i|
112
+ hs * (row[:x1] + row[:x2])
113
+ }
114
+ exp_pc_2 = ds.collect_row_with_index {|row, _i|
115
+ gsl ? hs * (row[:x2] - row[:x1]) : hs * (row[:x1] - row[:x2])
116
+ }
117
+ assert_similar_vector(exp_pc_1, pcs[:PC_1])
118
+ assert_similar_vector(exp_pc_2, pcs[:PC_2])
119
+ end
120
+
121
+ def test_antiimage
122
+ cor = Matrix[[1, 0.964, 0.312], [0.964, 1, 0.411], [0.312, 0.411, 1]]
123
+ expected = Matrix[[0.062, -0.057, 0.074], [-0.057, 0.057, -0.089], [0.074, -0.089, 0.729]]
124
+ ai = Statsample::Factor.anti_image_covariance_matrix(cor)
125
+ assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected} not equal to #{ai}")
126
+ end
127
+
128
+ def test_kmo
129
+ @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
130
+ @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
131
+ @v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
132
+ # KMO: 0.490
133
+ ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
134
+ cor = Statsample::Bivariate.correlation_matrix(ds)
135
+ kmo = Statsample::Factor.kmo(cor)
136
+ assert_in_delta(0.667, kmo, 0.001)
137
+ assert_in_delta(0.81, Statsample::Factor.kmo(harman_817), 0.01)
138
+ end
139
+
140
+ def test_kmo_univariate
141
+ m = harman_817
142
+ expected = [0.73, 0.76, 0.84, 0.87, 0.53, 0.93, 0.78, 0.86]
143
+ m.row_size.times.map {|i|
144
+ assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m, i), 0.01)
145
+ }
146
+ end
147
+ # Tested with SPSS and R
148
+ def test_pca
149
+ dtype = Statsample.has_gsl? ? :gsl : :array
150
+ a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1], dtype: dtype)
151
+ b = Daru::Vector.new([2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9], dtype: dtype)
152
+ a = a - a.mean
153
+ b = b - b.mean
154
+ ds = Daru::DataFrame.new({ :a => a, :b => b })
155
+
156
+ cov_matrix = Statsample::Bivariate.covariance_matrix(ds)
157
+ if Statsample.has_gsl?
158
+ pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: true)
159
+ pca_set(pca, 'gsl')
160
+ else
161
+ skip('Eigenvalues could be calculated with GSL (requires gsl)')
162
+ end
163
+ pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: false)
164
+ pca_set(pca, 'ruby')
165
+ end
166
+
167
+ def pca_set(pca, _type)
168
+ expected_eigenvalues = [1.284, 0.0490]
169
+ expected_eigenvalues.each_with_index{|ev, i|
170
+ assert_in_delta(ev, pca.eigenvalues[i], 0.001)
171
+ }
172
+ expected_communality = [0.590, 0.694]
173
+ expected_communality.each_with_index{|ev, i|
174
+ assert_in_delta(ev, pca.communalities[i], 0.001)
175
+ }
176
+ expected_cm = [0.768, 0.833]
177
+ obs = pca.component_matrix_correlation(1).column(0).to_a
178
+ expected_cm.each_with_index{|ev, i|
179
+ assert_in_delta(ev, obs[i], 0.001)
180
+ }
181
+
182
+ assert(pca.summary)
183
+ end
184
+
185
+ # Tested with R
186
+ def test_principalaxis
187
+ matrix = ::Matrix[
188
+ [1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807], [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
189
+
190
+ fa = Statsample::Factor::PrincipalAxis.new(matrix, m: 1, max_iterations: 50)
191
+
192
+ cm = ::Matrix[[0.923], [0.912], [0.507], [0.483]]
193
+
194
+ assert_equal_matrix(cm, fa.component_matrix, 0.001)
195
+
196
+ h2 = [0.852, 0.832, 0.257, 0.233]
197
+ h2.each_with_index{|ev, i|
198
+ assert_in_delta(ev, fa.communalities[i], 0.001)
199
+ }
200
+ eigen1 = 2.175
201
+ assert_in_delta(eigen1, fa.eigenvalues[0], 0.001)
202
+ assert(fa.summary.size > 0)
203
+ fa = Statsample::Factor::PrincipalAxis.new(matrix, smc: false)
204
+
205
+ assert_raise RuntimeError do
206
+ fa.iterate
207
+ end
208
+ end
209
+
210
+ def test_rotation_varimax
211
+ a = Matrix[[0.4320, 0.8129, 0.3872],
212
+ [0.7950, -0.5416, 0.2565],
213
+ [0.5944, 0.7234, -0.3441],
214
+ [0.8945, -0.3921, -0.1863]]
215
+
216
+ expected = Matrix[[-0.0204423, 0.938674, -0.340334],
217
+ [0.983662, 0.0730206, 0.134997],
218
+ [0.0826106, 0.435975, -0.893379],
219
+ [0.939901, -0.0965213, -0.309596]]
220
+ varimax = Statsample::Factor::Varimax.new(a)
221
+ assert(!varimax.rotated.nil?, "Rotated shouldn't be empty")
222
+ assert(!varimax.component_transformation_matrix.nil?, "Component matrix shouldn't be empty")
223
+ assert(!varimax.h2.nil?, "H2 shouldn't be empty")
224
+
225
+ assert_equal_matrix(expected, varimax.rotated, 1e-6)
226
+ assert(varimax.summary.size > 0)
227
+ end
228
+ end