statsample-ekatena 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,16 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleAwesomePrintBug < Minitest::Test
3
+ context('Awesome Print integration') do
4
+ setup do
5
+ require 'awesome_print'
6
+ end
7
+ should 'should be flawless' do
8
+ a = Daru::Vector.new([1, 2, 3])
9
+
10
+ assert(a != [1, 2, 3])
11
+ assert_nothing_raised do
12
+ ap a
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,25 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+
3
+ class StatsampleBartlettSphericityTestCase < Minitest::Test
4
+ include Statsample::Test
5
+ context Statsample::Test::BartlettSphericity do
6
+ setup do
7
+ @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
8
+ @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
9
+ @v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
10
+ # KMO: 0.490
11
+ ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
12
+ cor = Statsample::Bivariate.correlation_matrix(ds)
13
+ @bs = Statsample::Test::BartlettSphericity.new(cor, 14)
14
+ end
15
+ should 'have correct value for chi' do
16
+ assert_in_delta(9.477, @bs.value, 0.001)
17
+ end
18
+ should 'have correct value for df' do
19
+ assert_equal(3, @bs.df)
20
+ end
21
+ should 'have correct value for probability' do
22
+ assert_in_delta(0.024, @bs.probability, 0.001)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,164 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleBivariateTestCase < Minitest::Test
3
+ should 'method sum of squares should be correct' do
4
+ v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6])
5
+ v2 = Daru::Vector.new([6, 2, 4, 10, 12, 8])
6
+ assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1, v2))
7
+ end
8
+ should_with_gsl 'return same covariance with ruby and gls implementation' do
9
+ v1 = Daru::Vector.new(20.times.collect { |_a| rand })
10
+ v2 = Daru::Vector.new(20.times.collect { |_a| rand })
11
+ assert_in_delta(Statsample::Bivariate.covariance(v1, v2), Statsample::Bivariate.covariance_slow(v1, v2), 0.001)
12
+ end
13
+
14
+ should_with_gsl 'return same correlation with ruby and gls implementation' do
15
+ v1 = Daru::Vector.new(20.times.collect { |_a| rand })
16
+ v2 = Daru::Vector.new(20.times.collect { |_a| rand })
17
+
18
+ assert_in_delta(GSL::Stats.correlation(v1.to_gsl, v2.to_gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
19
+ end
20
+ should 'return correct pearson correlation' do
21
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
22
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
23
+ assert_in_delta(0.525, Statsample::Bivariate.pearson(v1, v2), 0.001)
24
+ assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v1, v2), 0.001)
25
+
26
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil])
27
+ v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500])
28
+ assert_in_delta(0.525, Statsample::Bivariate.pearson(v3, v4), 0.001)
29
+ # Test ruby method
30
+ v3a, v4a = Statsample.only_valid v3, v4
31
+ assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a, v4a), 0.001)
32
+ end
33
+ should 'return correct values for t_pearson and prop_pearson' do
34
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
35
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
36
+ r = Statsample::Bivariate::Pearson.new(v1, v2)
37
+ assert_in_delta(0.525, r.r, 0.001)
38
+ assert_in_delta(Statsample::Bivariate.t_pearson(v1, v2), r.t, 0.001)
39
+ assert_in_delta(Statsample::Bivariate.prop_pearson(r.t, 8, :both), r.probability, 0.001)
40
+ assert(r.summary.size > 0)
41
+ end
42
+ should 'return correct correlation_matrix with nils values' do
43
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
44
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
45
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
46
+ v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6])
47
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
48
+ c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
49
+ expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
50
+ [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
51
+ ]
52
+ obt = Statsample::Bivariate.correlation_matrix(ds)
53
+ for i in 0...expected.row_size
54
+ for j in 0...expected.column_size
55
+ # puts expected[i,j].inspect
56
+ # puts obt[i,j].inspect
57
+ assert_in_delta(expected[i, j], obt[i, j], 0.0001, "#{expected[i, j].class}!=#{obt[i, j].class} ")
58
+ end
59
+ end
60
+ # assert_equal(expected,obt)
61
+ end
62
+ should_with_gsl 'return same values for optimized and pairwise covariance matrix' do
63
+ cases = 100
64
+ v1 = Daru::Vector.new_with_size(cases) { rand }
65
+ v2 = Daru::Vector.new_with_size(cases) { rand }
66
+ v3 = Daru::Vector.new_with_size(cases) { rand }
67
+ v4 = Daru::Vector.new_with_size(cases) { rand }
68
+ v5 = Daru::Vector.new_with_size(cases) { rand }
69
+
70
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
71
+
72
+ cor_opt = Statsample::Bivariate.covariance_matrix_optimized(ds)
73
+
74
+ cor_pw = Statsample::Bivariate.covariance_matrix_pairwise(ds)
75
+ assert_equal_matrix(cor_opt, cor_pw, 1e-15)
76
+ end
77
+ should_with_gsl 'return same values for optimized and pairwise correlation matrix' do
78
+ cases = 100
79
+ v1 = Daru::Vector.new_with_size(cases) { rand }
80
+ v2 = Daru::Vector.new_with_size(cases) { rand }
81
+ v3 = Daru::Vector.new_with_size(cases) { rand }
82
+ v4 = Daru::Vector.new_with_size(cases) { rand }
83
+ v5 = Daru::Vector.new_with_size(cases) { rand }
84
+
85
+ ds = Daru::DataFrame.new({
86
+ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
87
+
88
+ cor_opt = Statsample::Bivariate.correlation_matrix_optimized(ds)
89
+
90
+ cor_pw = Statsample::Bivariate.correlation_matrix_pairwise(ds)
91
+ assert_equal_matrix(cor_opt, cor_pw, 1e-15)
92
+ end
93
+ should 'return correct correlation_matrix without nils values' do
94
+ v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
95
+ v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
96
+ v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
97
+ v4 = Daru::Vector.new([2, 4, 6, 7, 3, 7, 8, 6])
98
+ ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
99
+ c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
100
+ expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
101
+ [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
102
+ ]
103
+ obt = Statsample::Bivariate.correlation_matrix(ds)
104
+ for i in 0...expected.row_size
105
+ for j in 0...expected.column_size
106
+ # puts expected[i,j].inspect
107
+ # puts obt[i,j].inspect
108
+ assert_in_delta(expected[i, j], obt[i, j], 0.0001, "#{expected[i, j].class}!=#{obt[i, j].class} ")
109
+ end
110
+ end
111
+ # assert_equal(expected,obt)
112
+ end
113
+
114
+ should 'return correct value for prop pearson' do
115
+ assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084, 94), 94), 0.01)
116
+ assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046, 95), 95), 0.01)
117
+ r = 0.9
118
+ n = 100
119
+ t = Statsample::Bivariate.t_r(r, n)
120
+ assert(Statsample::Bivariate.prop_pearson(t, n, :both) < 0.05)
121
+ assert(Statsample::Bivariate.prop_pearson(t, n, :right) < 0.05)
122
+ assert(Statsample::Bivariate.prop_pearson(t, n, :left) > 0.05)
123
+
124
+ r = -0.9
125
+ n = 100
126
+ t = Statsample::Bivariate.t_r(r, n)
127
+ assert(Statsample::Bivariate.prop_pearson(t, n, :both) < 0.05)
128
+ assert(Statsample::Bivariate.prop_pearson(t, n, :right) > 0.05)
129
+ assert(Statsample::Bivariate.prop_pearson(t, n, :left) < 0.05)
130
+ end
131
+
132
+ should "return correct value for Spearman's rho" do
133
+ v1 =Daru::Vector.new( [86, 97, 99, 100, 101, 103, 106, 110, 112, 113])
134
+ v2 =Daru::Vector.new( [0, 20, 28, 27, 50, 29, 7, 17, 6, 12])
135
+ assert_in_delta(-0.175758, Statsample::Bivariate.spearman(v1, v2), 0.0001)
136
+ end
137
+ should 'return correct value for point_biserial correlation' do
138
+ c = Daru::Vector.new([1, 3, 5, 6, 7, 100, 200, 300, 400, 300])
139
+ d = Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
140
+ assert_raises TypeError do
141
+ Statsample::Bivariate.point_biserial(c, d)
142
+ end
143
+ assert_in_delta(Statsample::Bivariate.point_biserial(d, c), Statsample::Bivariate.pearson(d, c), 0.0001)
144
+ end
145
+ should 'return correct value for tau_a and tau_b' do
146
+ v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
147
+ v2 = Daru::Vector.new([1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11])
148
+ assert_in_delta(0.6727, Statsample::Bivariate.tau_a(v1, v2), 0.001)
149
+ assert_in_delta(0.6727, Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1, v2).to_matrix)), 0.001)
150
+ v1 = Daru::Vector.new([12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27])
151
+ v2 = Daru::Vector.new([11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0])
152
+ assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1, v2).to_matrix), 0.001)
153
+ end
154
+ should 'return correct value for gamma correlation' do
155
+ m = Matrix[[10, 5, 2], [10, 15, 20]]
156
+ assert_in_delta(0.636, Statsample::Bivariate.gamma(m), 0.001)
157
+ m2 = Matrix[[15, 12, 6, 5], [12, 8, 10, 8], [4, 6, 9, 10]]
158
+ assert_in_delta(0.349, Statsample::Bivariate.gamma(m2), 0.001)
159
+ end
160
+
161
+ should 'return correct residuals' do
162
+ # TODO: test Statsample::Bivariate.residuals
163
+ end
164
+ end
@@ -0,0 +1,78 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleCodificationTestCase < Minitest::Test
3
+ def initialize(*args)
4
+ v1 = Daru::Vector.new(%w(run walk,run walking running sleep sleeping,dreaming sleep,dream))
5
+ @dict = { 'run' => 'r', 'walk' => 'w', 'walking' => 'w', 'running' => 'r', 'sleep' => 's', 'sleeping' => 's', 'dream' => 'd', 'dreaming' => 'd' }
6
+ @ds = Daru::DataFrame.new({ :v1 => v1 })
7
+ super
8
+ end
9
+
10
+ def test_create_hash
11
+ expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
12
+ hash = Statsample::Codification.create_hash(@ds, [:v1])
13
+ assert_equal([:v1], hash.keys)
14
+ assert_equal(expected_keys_v1, hash[:v1].keys.sort)
15
+ assert_equal(expected_keys_v1, hash[:v1].values.sort)
16
+ end
17
+
18
+ def test_create_excel
19
+ filename = Dir.tmpdir + '/test_excel' + Time.now.to_s + '.xls'
20
+ # filename = Tempfile.new("test_codification_"+Time.now().to_s)
21
+ Statsample::Codification.create_excel(@ds, ['v1'], filename)
22
+ field = Daru::Vector.new(['v1'] * 8, name: :field)
23
+ keys = Daru::Vector.new(%w(dream dreaming run running sleep sleeping walk walking))
24
+ ds = Daru::DataFrame.from_excel(filename)
25
+ assert_equal(field, ds[:field])
26
+ assert_equal(keys, ds[:original])
27
+ assert_equal(keys, ds[:recoded])
28
+ hash = Statsample::Codification.excel_to_recoded_hash(filename)
29
+ assert_equal(keys.to_a, hash[:v1].keys.sort)
30
+ assert_equal(keys.to_a, hash[:v1].values.sort)
31
+ end
32
+
33
+ def test_create_yaml
34
+ assert_raise ArgumentError do
35
+ Statsample::Codification.create_yaml(@ds, [])
36
+ end
37
+ expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
38
+ yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1])
39
+ h = YAML.load(yaml_hash)
40
+ assert_equal([:v1], h.keys)
41
+ assert_equal(expected_keys_v1, h[:v1].keys.sort)
42
+ tf = Tempfile.new('test_codification')
43
+ yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1], tf, Statsample::SPLIT_TOKEN)
44
+ tf.close
45
+ tf.open
46
+ h = YAML.load(tf)
47
+ assert_equal([:v1], h.keys)
48
+ assert_equal(expected_keys_v1, h[:v1].keys.sort)
49
+ tf.close(true)
50
+ end
51
+
52
+ def test_recodification
53
+ expected = [['r'], %w(w r), ['w'], ['r'], ['s'], %w(s d), %w(s d)]
54
+ assert_equal(expected, Statsample::Codification.recode_vector(@ds[:v1], @dict))
55
+ v2 = Daru::Vector.new(['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'])
56
+ expected = [['r'], %w(w d), nil, %w(w d)]
57
+ assert_equal(expected, Statsample::Codification.recode_vector(v2, @dict))
58
+ end
59
+
60
+ def test_recode_dataset_simple
61
+ Statsample::Codification.recode_dataset_simple!(@ds, :v1 => @dict)
62
+ expected_vector = Daru::Vector.new(['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'])
63
+ assert_not_equal(expected_vector, @ds[:v1])
64
+ assert_equal(expected_vector, @ds[:v1_recoded])
65
+ end
66
+
67
+ def test_recode_dataset_split
68
+ Statsample::Codification.recode_dataset_split!(@ds, :v1 => @dict)
69
+ e = {}
70
+ e['r'] = Daru::Vector.new([1, 1, 0, 1, 0, 0, 0])
71
+ e['w'] = Daru::Vector.new([0, 1, 1, 0, 0, 0, 0])
72
+ e['s'] = Daru::Vector.new([0, 0, 0, 0, 1, 1, 1])
73
+ e['d'] = Daru::Vector.new([0, 0, 0, 0, 0, 1, 1])
74
+ e.each { |k, expected|
75
+ assert_equal(expected, @ds[('v1_' + k).to_sym], "Error on key #{k}")
76
+ }
77
+ end
78
+ end
@@ -0,0 +1,67 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleCrosstabTestCase < Minitest::Test
3
+ def initialize(*args)
4
+ @v1 =Daru::Vector.new( %w(black blonde black black red black brown black blonde black red black blonde))
5
+ @v2 =Daru::Vector.new( %w(woman man man woman man man man woman man woman woman man man))
6
+ @ct = Statsample::Crosstab.new(@v1, @v2)
7
+ super
8
+ end
9
+
10
+ def test_crosstab_errors
11
+ e1 = %w(black blonde black black red black brown black blonde black)
12
+ assert_raise ArgumentError do
13
+ Statsample::Crosstab.new(e1, @v2)
14
+ end
15
+ e2 = Daru::Vector.new(%w(black blonde black black red black brown black blonde black black))
16
+
17
+ assert_raise ArgumentError do
18
+ Statsample::Crosstab.new(e2, @v2)
19
+ end
20
+ assert_nothing_raised do
21
+ Statsample::Crosstab.new(@v1, @v2)
22
+ end
23
+ end
24
+
25
+ def test_crosstab_basic
26
+ assert_equal(Daru::Vector.new(%w(black blonde brown red)), @ct.rows_names)
27
+ assert_equal(Daru::Vector.new(%w(man woman)), @ct.cols_names)
28
+ assert_equal({ 'black' => 7, 'blonde' => 3, 'red' => 2, 'brown' => 1 }, @ct.rows_total)
29
+ assert_equal({ 'man' => 8, 'woman' => 5 }, @ct.cols_total)
30
+ end
31
+
32
+ def test_crosstab_frequencies
33
+ fq = @ct.frequencies
34
+ assert_equal(8, fq.size)
35
+ sum = fq.inject(0) { |s, x| s + x[1] }
36
+ assert_equal(13, sum)
37
+ fr = @ct.frequencies_by_row
38
+ assert_equal(4, fr.size)
39
+ assert_equal(%w(black blonde brown red), fr.keys.sort)
40
+ fc = @ct.frequencies_by_col
41
+ assert_equal(2, fc.size)
42
+ assert_equal(%w(man woman), fc.keys.sort)
43
+ assert_equal(Matrix.rows([[3, 4], [3, 0], [1, 0], [1, 1]]), @ct.to_matrix)
44
+ end
45
+
46
+ def test_summary
47
+ @ct.percentage_row = true
48
+ @ct.percentage_column = true
49
+ @ct.percentage_total = true
50
+ assert(@ct.summary.size > 0)
51
+ end
52
+
53
+ def test_expected
54
+ v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
55
+ v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
56
+ ct = Statsample::Crosstab.new(v1, v2)
57
+ assert_equal(Matrix[[2.5, 2.5], [2.5, 2.5]], ct.matrix_expected)
58
+ end
59
+
60
+ def test_crosstab_with_scale
61
+ v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
62
+ v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
63
+ ct = Statsample::Crosstab.new(v1, v2)
64
+ assert_equal(Matrix[[0, 5], [5, 0]], ct.to_matrix)
65
+ assert_nothing_raised { ct.summary }
66
+ end
67
+ end
@@ -0,0 +1,39 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleDominanceAnalysisTestCase < Minitest::Test
3
+ def test_dominance_univariate
4
+ # Example from Budescu (1993)
5
+ m = Matrix[[1, 0.683, 0.154, 0.460, 0.618], [0.683, 1, -0.050, 0.297, 0.461], [0.154, -0.050, 1, 0.006, 0.262], [0.460, 0.297, 0.006, 1, 0.507], [0.618, 0.461, 0.262, 0.507, 1]]
6
+ m.extend Statsample::CovariateMatrix
7
+ m.fields = %w(x1 x2 x3 x4 y)
8
+ da = Statsample::DominanceAnalysis.new(m, 'y')
9
+
10
+ contr_x1 = { 'x2' => 0.003, 'x3' => 0.028, 'x4' => 0.063 }
11
+ contr_x1.each do |k, v|
12
+ assert_in_delta(v, da.models_data[['x1']].contributions[k], 0.001)
13
+ end
14
+ assert_in_delta(0.052, da.models_data[%w(x2 x3 x4)].contributions['x1'], 0.001)
15
+ expected_dominances = [1, 1, 0.5, 0.5, 0, 0]
16
+ expected_g_dominances = [1, 1, 1, 1, 0, 0]
17
+
18
+ da.pairs.each_with_index do |a, i|
19
+ assert_equal(expected_dominances[i], da.total_dominance_pairwise(a[0], a[1]))
20
+ assert_equal(expected_dominances[i], da.conditional_dominance_pairwise(a[0], a[1]))
21
+ assert_equal(expected_g_dominances[i], da.general_dominance_pairwise(a[0], a[1]))
22
+ end
23
+ assert(da.summary.size > 0)
24
+ end
25
+
26
+ def test_dominance_multivariate
27
+ m = Matrix[[1.0, -0.19, -0.358, -0.343, 0.359, 0.257], [-0.19, 1.0, 0.26, 0.29, -0.11, -0.11], [-0.358, 0.26, 1.0, 0.54, -0.49, -0.23], [-0.343, 0.29, 0.54, 1.0, -0.22, -0.41], [0.359, -0.11, -0.49, -0.22, 1.0, 0.62], [0.257, -0.11, -0.23, -0.41, 0.62, 1]]
28
+ m.extend Statsample::CovariateMatrix
29
+ m.fields = %w(y1 y2 x1 x2 x3 x4)
30
+ m2 = m.submatrix(%w(y1 x1 x2 x3 x4))
31
+
32
+ da = Statsample::DominanceAnalysis.new(m, %w(y1 y2), cases: 683, method_association: :p2yx)
33
+
34
+ contr_x1 = { 'x2' => 0.027, 'x3' => 0.024, 'x4' => 0.017 }
35
+ contr_x1.each do |k, v|
36
+ assert_in_delta(v, da.models_data[['x1']].contributions[k], 0.003)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,228 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ # require 'rserve'
3
+ # require 'statsample/rserve_extension'
4
+
5
+ class StatsampleFactorTestCase < Minitest::Test
6
+ include Statsample::Fixtures
7
+ # Based on Hardle and Simar
8
+ def setup
9
+ @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
10
+ end
11
+
12
+ # Based on Hurdle example
13
+ def test_covariance_matrix
14
+ ds = Daru::DataFrame.from_plaintext(@fixtures_dir + '/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6])
15
+ ds.vectors.each {|f|
16
+ ds[f] = ds[f].center
17
+ }
18
+ cm = Statsample::Bivariate.covariance_matrix ds
19
+ pca = Statsample::Factor::PCA.new(cm, m: 6)
20
+ # puts pca.summary
21
+ # puts pca.feature_matrix
22
+ exp_eig = Daru::Vector.new([2.985, 0.931, 0.242, 0.194, 0.085, 0.035])
23
+ assert_similar_vector(exp_eig, Daru::Vector.new(pca.eigenvalues), 0.1)
24
+ pcs = pca.principal_components(ds)
25
+ k = 6
26
+ comp_matrix = pca.component_matrix
27
+ k.times {|i|
28
+ pc_id = "PC_#{i + 1}".to_sym
29
+ k.times {|j| # variable
30
+ ds_id = "v#{j + 1}".to_sym
31
+ r = Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
32
+ assert_in_delta(r, comp_matrix[j, i])
33
+ }
34
+ }
35
+ end
36
+
37
+ def test_principalcomponents_ruby_gsl
38
+ if Statsample.has_gsl?
39
+ ran = Distribution::Normal.rng
40
+
41
+ # @r=::Rserve::Connection.new
42
+
43
+ samples = 20
44
+ [3, 5, 7].each {|k|
45
+ v = {}
46
+ v[:x0] = Daru::Vector.new(samples.times.map { ran.call }).center
47
+ (1...k).each { |i|
48
+ v["x#{i}".to_sym] = Daru::Vector.new(samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}".to_sym][ii] * 0.5 }).center
49
+ }
50
+
51
+ ds = Daru::DataFrame.new(v)
52
+ cm = Statsample::Bivariate.covariance_matrix ds
53
+ # @r.assign('ds',ds)
54
+ # @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
55
+ # puts "eigenvalues"
56
+ # puts @r.eval('v').to_ruby.to_s
57
+ pca_ruby = Statsample::Factor::PCA.new(cm, m: k, use_gsl: false)
58
+ pca_gsl = Statsample::Factor::PCA.new(cm, m: k, use_gsl: true)
59
+ pc_ruby = pca_ruby.principal_components(ds)
60
+ pc_gsl = pca_gsl.principal_components(ds)
61
+ # Test component matrix correlation!
62
+ cm_ruby = pca_ruby.component_matrix
63
+ # puts cm_ruby.summary
64
+ k.times {|i|
65
+ pc_id = "PC_#{i + 1}".to_sym
66
+ assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
67
+ # Revert gsl component values
68
+ pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
69
+ assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
70
+ if false
71
+ k.times {|j| # variable
72
+ ds_id = "x#{j}".to_sym
73
+ r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
74
+ puts "#{pc_id}-#{ds_id}:#{r}"
75
+ }
76
+ end
77
+ }
78
+ }
79
+ end
80
+ # @r.close
81
+ end
82
+
83
+ def test_principalcomponents
84
+ if Statsample.has_gsl?
85
+ principalcomponents(true)
86
+ else
87
+ skip "Require GSL"
88
+ end
89
+ principalcomponents(false)
90
+ end
91
+
92
+ def principalcomponents(gsl)
93
+ ran = Distribution::Normal.rng
94
+ samples = 50
95
+ x1 = Daru::Vector.new(samples.times.map { ran.call })
96
+ x2 = Daru::Vector.new(samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 })
97
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
98
+
99
+ cm = Statsample::Bivariate.correlation_matrix ds
100
+ r = cm[0, 1]
101
+ pca = Statsample::Factor::PCA.new(cm, m: 2, use_gsl: gsl)
102
+ assert_in_delta(1 + r, pca.eigenvalues[0], 1e-10)
103
+ assert_in_delta(1 - r, pca.eigenvalues[1], 1e-10)
104
+ hs = 1.0 / Math.sqrt(2)
105
+ assert_equal_vector(Vector[1, 1] * hs, pca.eigenvectors[0])
106
+ m_1 = gsl ? Vector[-1, 1] : Vector[1, -1]
107
+
108
+ assert_equal_vector(hs * m_1, pca.eigenvectors[1])
109
+
110
+ pcs = pca.principal_components(ds)
111
+ exp_pc_1 = ds.collect_row_with_index {|row, _i|
112
+ hs * (row[:x1] + row[:x2])
113
+ }
114
+ exp_pc_2 = ds.collect_row_with_index {|row, _i|
115
+ gsl ? hs * (row[:x2] - row[:x1]) : hs * (row[:x1] - row[:x2])
116
+ }
117
+ assert_similar_vector(exp_pc_1, pcs[:PC_1])
118
+ assert_similar_vector(exp_pc_2, pcs[:PC_2])
119
+ end
120
+
121
+ def test_antiimage
122
+ cor = Matrix[[1, 0.964, 0.312], [0.964, 1, 0.411], [0.312, 0.411, 1]]
123
+ expected = Matrix[[0.062, -0.057, 0.074], [-0.057, 0.057, -0.089], [0.074, -0.089, 0.729]]
124
+ ai = Statsample::Factor.anti_image_covariance_matrix(cor)
125
+ assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected} not equal to #{ai}")
126
+ end
127
+
128
+ def test_kmo
129
+ @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
130
+ @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
131
+ @v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
132
+ # KMO: 0.490
133
+ ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
134
+ cor = Statsample::Bivariate.correlation_matrix(ds)
135
+ kmo = Statsample::Factor.kmo(cor)
136
+ assert_in_delta(0.667, kmo, 0.001)
137
+ assert_in_delta(0.81, Statsample::Factor.kmo(harman_817), 0.01)
138
+ end
139
+
140
+ def test_kmo_univariate
141
+ m = harman_817
142
+ expected = [0.73, 0.76, 0.84, 0.87, 0.53, 0.93, 0.78, 0.86]
143
+ m.row_size.times.map {|i|
144
+ assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m, i), 0.01)
145
+ }
146
+ end
147
+ # Tested with SPSS and R
148
+ def test_pca
149
+ dtype = Statsample.has_gsl? ? :gsl : :array
150
+ a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1], dtype: dtype)
151
+ b = Daru::Vector.new([2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9], dtype: dtype)
152
+ a = a - a.mean
153
+ b = b - b.mean
154
+ ds = Daru::DataFrame.new({ :a => a, :b => b })
155
+
156
+ cov_matrix = Statsample::Bivariate.covariance_matrix(ds)
157
+ if Statsample.has_gsl?
158
+ pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: true)
159
+ pca_set(pca, 'gsl')
160
+ else
161
+ skip('Eigenvalues could be calculated with GSL (requires gsl)')
162
+ end
163
+ pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: false)
164
+ pca_set(pca, 'ruby')
165
+ end
166
+
167
+ def pca_set(pca, _type)
168
+ expected_eigenvalues = [1.284, 0.0490]
169
+ expected_eigenvalues.each_with_index{|ev, i|
170
+ assert_in_delta(ev, pca.eigenvalues[i], 0.001)
171
+ }
172
+ expected_communality = [0.590, 0.694]
173
+ expected_communality.each_with_index{|ev, i|
174
+ assert_in_delta(ev, pca.communalities[i], 0.001)
175
+ }
176
+ expected_cm = [0.768, 0.833]
177
+ obs = pca.component_matrix_correlation(1).column(0).to_a
178
+ expected_cm.each_with_index{|ev, i|
179
+ assert_in_delta(ev, obs[i], 0.001)
180
+ }
181
+
182
+ assert(pca.summary)
183
+ end
184
+
185
+ # Tested with R
186
+ def test_principalaxis
187
+ matrix = ::Matrix[
188
+ [1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807], [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
189
+
190
+ fa = Statsample::Factor::PrincipalAxis.new(matrix, m: 1, max_iterations: 50)
191
+
192
+ cm = ::Matrix[[0.923], [0.912], [0.507], [0.483]]
193
+
194
+ assert_equal_matrix(cm, fa.component_matrix, 0.001)
195
+
196
+ h2 = [0.852, 0.832, 0.257, 0.233]
197
+ h2.each_with_index{|ev, i|
198
+ assert_in_delta(ev, fa.communalities[i], 0.001)
199
+ }
200
+ eigen1 = 2.175
201
+ assert_in_delta(eigen1, fa.eigenvalues[0], 0.001)
202
+ assert(fa.summary.size > 0)
203
+ fa = Statsample::Factor::PrincipalAxis.new(matrix, smc: false)
204
+
205
+ assert_raise RuntimeError do
206
+ fa.iterate
207
+ end
208
+ end
209
+
210
+ def test_rotation_varimax
211
+ a = Matrix[[0.4320, 0.8129, 0.3872],
212
+ [0.7950, -0.5416, 0.2565],
213
+ [0.5944, 0.7234, -0.3441],
214
+ [0.8945, -0.3921, -0.1863]]
215
+
216
+ expected = Matrix[[-0.0204423, 0.938674, -0.340334],
217
+ [0.983662, 0.0730206, 0.134997],
218
+ [0.0826106, 0.435975, -0.893379],
219
+ [0.939901, -0.0965213, -0.309596]]
220
+ varimax = Statsample::Factor::Varimax.new(a)
221
+ assert(!varimax.rotated.nil?, "Rotated shouldn't be empty")
222
+ assert(!varimax.component_transformation_matrix.nil?, "Component matrix shouldn't be empty")
223
+ assert(!varimax.h2.nil?, "H2 shouldn't be empty")
224
+
225
+ assert_equal_matrix(expected, varimax.rotated, 1e-6)
226
+ assert(varimax.summary.size > 0)
227
+ end
228
+ end