statsample-ekatena 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,232 @@
1
+ module Statsample
2
+ class DominanceAnalysis
3
+ # == Goal
4
+ # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
5
+ #
6
+ # == Usage
7
+ #
8
+ # require 'statsample'
9
+ # a = Daru::Vector.new(100.times.collect {rand})
10
+ # b = Daru::Vector.new(100.times.collect {rand})
11
+ # c = Daru::Vector.new(100.times.collect {rand})
12
+ # d = Daru::Vector.new(100.times.collect {rand})
13
+ # ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
14
+ # ds[:y] = ds.collect_rows { |row| row[:a]*5+row[:b]*2+row[:c]*2+row[:d]*2+10*rand() }
15
+ # dab=Statsample::DominanceAnalysis::Bootstrap.new(ds, :y, :debug=>true)
16
+ # dab.bootstrap(100,nil)
17
+ # puts dab.summary
18
+ # <strong>Output</strong>
19
+ # Sample size: 100
20
+ # t: 1.98421693632958
21
+ #
22
+ # Linear Regression Engine: Statsample::Regression::Multiple::MatrixEngine
23
+ # Table: Bootstrap report
24
+ # --------------------------------------------------------------------------------------------
25
+ # | pairs | sD | Dij | SE(Dij) | Pij | Pji | Pno | Reproducibility |
26
+ # --------------------------------------------------------------------------------------------
27
+ # | Complete dominance |
28
+ # --------------------------------------------------------------------------------------------
29
+ # | a - b | 1.0 | 0.6150 | 0.454 | 0.550 | 0.320 | 0.130 | 0.550 |
30
+ # | a - c | 1.0 | 0.9550 | 0.175 | 0.930 | 0.020 | 0.050 | 0.930 |
31
+ # | a - d | 1.0 | 0.9750 | 0.131 | 0.960 | 0.010 | 0.030 | 0.960 |
32
+ # | b - c | 1.0 | 0.8800 | 0.276 | 0.820 | 0.060 | 0.120 | 0.820 |
33
+ # | b - d | 1.0 | 0.9250 | 0.193 | 0.860 | 0.010 | 0.130 | 0.860 |
34
+ # | c - d | 0.5 | 0.5950 | 0.346 | 0.350 | 0.160 | 0.490 | 0.490 |
35
+ # --------------------------------------------------------------------------------------------
36
+ # | Conditional dominance |
37
+ # --------------------------------------------------------------------------------------------
38
+ # | a - b | 1.0 | 0.6300 | 0.458 | 0.580 | 0.320 | 0.100 | 0.580 |
39
+ # | a - c | 1.0 | 0.9700 | 0.156 | 0.960 | 0.020 | 0.020 | 0.960 |
40
+ # | a - d | 1.0 | 0.9800 | 0.121 | 0.970 | 0.010 | 0.020 | 0.970 |
41
+ # | b - c | 1.0 | 0.8850 | 0.283 | 0.840 | 0.070 | 0.090 | 0.840 |
42
+ # | b - d | 1.0 | 0.9500 | 0.181 | 0.920 | 0.020 | 0.060 | 0.920 |
43
+ # | c - d | 0.5 | 0.5800 | 0.360 | 0.350 | 0.190 | 0.460 | 0.460 |
44
+ # --------------------------------------------------------------------------------------------
45
+ # | General Dominance |
46
+ # --------------------------------------------------------------------------------------------
47
+ # | a - b | 1.0 | 0.6500 | 0.479 | 0.650 | 0.350 | 0.000 | 0.650 |
48
+ # | a - c | 1.0 | 0.9800 | 0.141 | 0.980 | 0.020 | 0.000 | 0.980 |
49
+ # | a - d | 1.0 | 0.9900 | 0.100 | 0.990 | 0.010 | 0.000 | 0.990 |
50
+ # | b - c | 1.0 | 0.9000 | 0.302 | 0.900 | 0.100 | 0.000 | 0.900 |
51
+ # | b - d | 1.0 | 0.9700 | 0.171 | 0.970 | 0.030 | 0.000 | 0.970 |
52
+ # | c - d | 1.0 | 0.5600 | 0.499 | 0.560 | 0.440 | 0.000 | 0.560 |
53
+ # --------------------------------------------------------------------------------------------
54
+ #
55
+ # Table: General averages
56
+ # ---------------------------------------
57
+ # | var | mean | se | p.5 | p.95 |
58
+ # ---------------------------------------
59
+ # | a | 0.133 | 0.049 | 0.062 | 0.218 |
60
+ # | b | 0.106 | 0.048 | 0.029 | 0.199 |
61
+ # | c | 0.035 | 0.032 | 0.002 | 0.106 |
62
+ # | d | 0.023 | 0.019 | 0.002 | 0.062 |
63
+ # ---------------------------------------
64
+ #
65
+ # == References:
66
+ # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
67
+ class Bootstrap
68
+ include Writable
69
+ include Summarizable
70
+ # Total Dominance results
71
+ attr_reader :samples_td
72
+ # Conditional Dominance results
73
+ attr_reader :samples_cd
74
+ # General Dominance results
75
+ attr_reader :samples_gd
76
+ # General average results
77
+ attr_reader :samples_ga
78
+ # Name of fields
79
+ attr_reader :fields
80
+ # Regression class used for analysis
81
+ attr_accessor :regression_class
82
+ # Dataset
83
+ attr_accessor :ds
84
+ # Name of analysis
85
+ attr_accessor :name
86
+ # Alpha level of confidence. Default: ALPHA
87
+ attr_accessor :alpha
88
+ # Debug?
89
+ attr_accessor :debug
90
+ # Default level of confidence for t calculation
91
+ ALPHA=0.95
92
+ # Create a new Dominance Analysis Bootstrap Object
93
+ #
94
+ # * ds: A Daru::DataFrame object
95
+ # * y_var: Name of dependent variable
96
+ # * opts: Any other attribute of the class
97
+ def initialize(ds,y_var, opts=Hash.new)
98
+ @ds = ds
99
+ @y_var = y_var.respond_to?(:to_sym) ? y_var.to_sym : y_var
100
+ @n = ds.nrows
101
+
102
+ @n_samples=0
103
+ @alpha=ALPHA
104
+ @debug=false
105
+ if y_var.is_a? Array
106
+ @fields=ds.vectors.to_a - y_var
107
+ @regression_class=Regression::Multiple::MultipleDependent
108
+
109
+ else
110
+ @fields=ds.vectors.to_a - [y_var]
111
+ @regression_class=Regression::Multiple::MatrixEngine
112
+ end
113
+ @samples_ga=@fields.inject({}) { |a,v| a[v]=[]; a }
114
+
115
+ @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
116
+ opts.each{|k,v|
117
+ self.send("#{k}=",v) if self.respond_to? k
118
+ }
119
+ create_samples_pairs
120
+ end
121
+ # lr_class deprecated
122
+ alias_method :lr_class, :regression_class
123
+ def da
124
+ if @da.nil?
125
+ @da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
126
+ end
127
+ @da
128
+ end
129
+ # Creates n re-samples from original dataset and store result of
130
+ # each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
131
+ #
132
+ # * number_samples: Number of new samples to add
133
+ # * n: size of each new sample. If nil, equal to original sample size
134
+ def bootstrap(number_samples,n=nil)
135
+ number_samples.times{ |t|
136
+ @n_samples+=1
137
+ puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
138
+ ds_boot=@ds.bootstrap(n)
139
+ da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
140
+
141
+ da_1.total_dominance.each{|k,v|
142
+ @samples_td[k].push(v)
143
+ }
144
+ da_1.conditional_dominance.each{|k,v|
145
+ @samples_cd[k].push(v)
146
+ }
147
+ da_1.general_dominance.each{|k,v|
148
+ @samples_gd[k].push(v)
149
+ }
150
+ da_1.general_averages.each{|k,v|
151
+ @samples_ga[k].push(v)
152
+ }
153
+ }
154
+ end
155
+ def create_samples_pairs
156
+ @samples_td={}
157
+ @samples_cd={}
158
+ @samples_gd={}
159
+ @pairs=[]
160
+ c=(0...@fields.size).to_a.combination(2)
161
+ c.each do |data|
162
+ p data
163
+ convert=data.collect {|i| @fields[i] }
164
+ @pairs.push(convert)
165
+ [@samples_td, @samples_cd, @samples_gd].each{|s|
166
+ s[convert]=[]
167
+ }
168
+ end
169
+ end
170
+ def t
171
+ Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
172
+ end
173
+ def report_building(builder) # :nodoc:
174
+ raise "You should bootstrap first" if @n_samples==0
175
+ builder.section(:name=>@name) do |generator|
176
+ generator.text _("Sample size: %d\n") % @n_samples
177
+ generator.text "t: #{t}\n"
178
+ generator.text _("Linear Regression Engine: %s") % @regression_class.name
179
+
180
+ table=ReportBuilder::Table.new(:name=>"Bootstrap report", :header => [_("pairs"), "sD","Dij", _("SE(Dij)"), "Pij", "Pji", "Pno", _("Reproducibility")])
181
+ table.row([_("Complete dominance"),"","","","","","",""])
182
+ table.hr
183
+ @pairs.each{|pair|
184
+ std=Daru::Vector.new(@samples_td[pair])
185
+ ttd=da.total_dominance_pairwise(pair[0],pair[1])
186
+ table.row(summary_pairs(pair,std,ttd))
187
+ }
188
+ table.hr
189
+ table.row([_("Conditional dominance"),"","","","","","",""])
190
+ table.hr
191
+ @pairs.each{|pair|
192
+ std=Daru::Vector.new(@samples_cd[pair])
193
+ ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
194
+ table.row(summary_pairs(pair,std,ttd))
195
+
196
+ }
197
+ table.hr
198
+ table.row([_("General Dominance"),"","","","","","",""])
199
+ table.hr
200
+ @pairs.each{|pair|
201
+ std=Daru::Vector.new(@samples_gd[pair])
202
+ ttd=da.general_dominance_pairwise(pair[0],pair[1])
203
+ table.row(summary_pairs(pair,std,ttd))
204
+ }
205
+ generator.parse_element(table)
206
+
207
+ table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
208
+
209
+ @fields.each{|f|
210
+ v=Daru::Vector.new(@samples_ga[f])
211
+ row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
212
+ table.row(row)
213
+ }
214
+
215
+ generator.parse_element(table)
216
+ end
217
+ end
218
+ def summary_pairs(pair,std,ttd)
219
+ freqs=std.proportions
220
+ [0, 0.5, 1].each{|n|
221
+ freqs[n]=0 if freqs[n].nil?
222
+ }
223
+ name="%s - %s" % [@ds[pair[0]].name, @ds[pair[1]].name]
224
+ [name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
225
+ end
226
+ def f(v,n=3)
227
+ prec="%0.#{n}f"
228
+ sprintf(prec,v)
229
+ end
230
+ end
231
+ end
232
+ end
@@ -0,0 +1,104 @@
1
+ require 'statsample/factor/rotation'
2
+ require 'statsample/factor/pca'
3
+ require 'statsample/factor/principalaxis'
4
+ require 'statsample/factor/parallelanalysis'
5
+ require 'statsample/factor/map'
6
+
7
+ module Statsample
8
+ # Factor Analysis toolbox.
9
+ # * Classes for Extraction of factors:
10
+ # * Statsample::Factor::PCA
11
+ # * Statsample::Factor::PrincipalAxis
12
+ # * Classes for Rotation of factors:
13
+ # * Statsample::Factor::Varimax
14
+ # * Statsample::Factor::Equimax
15
+ # * Statsample::Factor::Quartimax
16
+ # * Classes for determining the number of components
17
+ # * Statsample::Factor::MAP
18
+ # * Statsample::Factor::ParallelAnalysis
19
+ #
20
+ # About number of components, O'Connor(2000) said:
21
+ # The two procedures [PA and MAP ] complement each other nicely,
22
+ # in that the MAP tends to err (when it does err) in the direction
23
+ # of underextraction, whereas parallel analysis tends to err
24
+ # (when it does err) in the direction of overextraction.
25
+ # Optimal decisions are thus likely to be made after considering
26
+ # the results of both analytic procedures. (p.10)
27
+
28
+ module Factor
29
+ # Anti-image covariance matrix.
30
+ # Useful for inspection of desireability of data for factor analysis.
31
+ # According to Dziuban & Shirkey (1974, p.359):
32
+ # "If this matrix does not exhibit many zero off-diagonal elements,
33
+ # the investigator has evidence that the correlation
34
+ # matrix is not appropriate for factor analysis."
35
+ #
36
+ def self.anti_image_covariance_matrix(matrix)
37
+ s2=Matrix.diagonal(*(matrix.inverse.diagonal)).inverse
38
+ aicm=(s2)*matrix.inverse*(s2)
39
+ aicm.extend(Statsample::CovariateMatrix)
40
+ aicm.fields=matrix.fields if matrix.respond_to? :fields
41
+ aicm
42
+ end
43
+ def self.anti_image_correlation_matrix(matrix)
44
+ matrix=matrix.to_matrix
45
+ s=Matrix.diagonal(*(matrix.inverse.diagonal)).sqrt.inverse
46
+ aicm=s*matrix.inverse*s
47
+
48
+ aicm.extend(Statsample::CovariateMatrix)
49
+ aicm.fields=matrix.fields if matrix.respond_to? :fields
50
+ aicm
51
+ end
52
+
53
+ # Kaiser-Meyer-Olkin measure of sampling adequacy for correlation matrix.
54
+ #
55
+ # Kaiser's (1974, cited on Dziuban & Shirkey, 1974) present calibration of the index is as follows :
56
+ # * .90s—marvelous
57
+ # * .80s— meritorious
58
+ # * .70s—middling
59
+ # * .60s—mediocre
60
+ # * .50s—miserable
61
+ # * .50 •—unacceptable
62
+ def self.kmo(matrix)
63
+ q=anti_image_correlation_matrix(matrix)
64
+ n=matrix.row_size
65
+ sum_r,sum_q=0,0
66
+ n.times do |j|
67
+ n.times do |k|
68
+ if j!=k
69
+ sum_r+=matrix[j,k]**2
70
+ sum_q+=q[j,k]**2
71
+ end
72
+ end
73
+ end
74
+ sum_r.quo(sum_r+sum_q)
75
+ end
76
+ # Kaiser-Meyer-Olkin measure of sampling adequacy for one variable.
77
+ #
78
+ def self.kmo_univariate(matrix, var)
79
+ if var.is_a? String
80
+ if matrix.respond_to? :fields
81
+ j=matrix.fields.index(var)
82
+ raise "Matrix doesn't have field #{var}" if j.nil?
83
+ else
84
+ raise "Matrix doesn't respond to fields"
85
+ end
86
+ else
87
+ j=var
88
+ end
89
+
90
+ q=anti_image_correlation_matrix(matrix)
91
+ n=matrix.row_size
92
+
93
+ sum_r,sum_q=0,0
94
+
95
+ n.times do |k|
96
+ if j!=k
97
+ sum_r+=matrix[j,k]**2
98
+ sum_q+=q[j,k]**2
99
+ end
100
+ end
101
+ sum_r.quo(sum_r+sum_q)
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,124 @@
1
+ module Statsample
2
+ module Factor
3
+ # = Velicer's Minimum Average Partial
4
+ #
5
+ # "Velicer’s (1976) MAP test involves a complete princi-
6
+ # pal components analysis followed by the examination of
7
+ # a series of matrices of partial correlations. Specifically,
8
+ # on the first step, the first principal component is par-
9
+ # tialed out of the correlations between the variables of in-
10
+ # terest, and the average squared coefficient in the off-
11
+ # diagonals of the resulting partial correlation matrix is
12
+ # computed. On the second step, the first two principal
13
+ # components are partialed out of the original correlation
14
+ # matrix and the average squared partial correlation is
15
+ # again computed. These computations are conducted for k
16
+ # (the number of variables) minus one steps. The average
17
+ # squared partial correlations from these steps are then
18
+ # lined up, and the number of components is determined by
19
+ # the step number in the analyses that resulted in the lowest
20
+ # average squared partial correlation. The average squared
21
+ # coefficient in the original correlation matrix is also com-
22
+ # puted, and if this coefficient happens to be lower than
23
+ # the lowest average squared partial correlation, then no
24
+ # components should be extracted from the correlation ma-
25
+ # trix. Statistically, components are retained as long as the
26
+ # variance in the correlation matrix represents systematic
27
+ # variance. Components are no longer retained when there
28
+ # is proportionately more unsystematic variance than sys-
29
+ # tematic variance." (O'Connor, 2000, p.397).
30
+ #
31
+ # Current algorithm is loosely based on SPSS O'Connor algorithm
32
+ #
33
+ # == Reference
34
+ # * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
35
+ #
36
+
37
+
38
+
39
+ class MAP
40
+ include Summarizable
41
+ include DirtyMemoize
42
+ # Name of analysis
43
+ attr_accessor :name
44
+ attr_reader :eigenvalues
45
+ # Number of factors to retain
46
+ attr_reader :number_of_factors
47
+ # Average squared correlations
48
+ attr_reader :fm
49
+ # Smallest average squared correlation
50
+ attr_reader :minfm
51
+
52
+ attr_accessor :use_gsl
53
+ def self.with_dataset(ds,opts=Hash.new)
54
+ new(ds.correlation_matrix,opts)
55
+ end
56
+ def initialize(matrix, opts=Hash.new)
57
+ @matrix=matrix
58
+ opts_default={
59
+ :use_gsl=>true,
60
+ :name=>_("Velicer's MAP")
61
+ }
62
+ @opts=opts_default.merge(opts)
63
+ opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
64
+ end
65
+ def compute
66
+ gsl_m=(use_gsl and Statsample.has_gsl?) ? @matrix.to_gsl : @matrix
67
+ klass_m=gsl_m.class
68
+ eigvect,@eigenvalues=gsl_m.eigenvectors_matrix, gsl_m.eigenvalues
69
+ eigenvalues_sqrt=@eigenvalues.collect {|v| Math.sqrt(v)}
70
+ loadings=eigvect*(klass_m.diagonal(*eigenvalues_sqrt))
71
+ fm=Array.new(@matrix.row_size)
72
+ ncol=@matrix.column_size
73
+
74
+ fm[0]=(gsl_m.mssq - ncol).quo(ncol*(ncol-1))
75
+
76
+ (ncol-1).times do |m|
77
+ puts "MAP:Eigenvalue #{m+1}" if $DEBUG
78
+ a=use_gsl ? loadings[0..(loadings.row_size-1),0..m] :
79
+ loadings.minor(0..(loadings.row_size-1),0..m)
80
+ partcov= gsl_m - (a*a.transpose)
81
+
82
+ d=klass_m.diagonal(*(partcov.diagonal.collect {|v| Math::sqrt(1/v)}))
83
+ pr=d*partcov*d
84
+ fm[m+1]=(pr.mssq-ncol).quo(ncol*(ncol-1))
85
+ end
86
+ minfm=fm[0]
87
+ nfactors=0
88
+ @errors=[]
89
+ fm.each_with_index do |v,s|
90
+ if defined?(Complex) and v.is_a? ::Complex
91
+ @errors.push(s)
92
+ else
93
+ if v < minfm
94
+ minfm=v
95
+ nfactors=s
96
+ end
97
+ end
98
+ end
99
+ @number_of_factors=nfactors
100
+ @fm=fm
101
+ @minfm=minfm
102
+
103
+ end
104
+ def report_building(g) #:nodoc:
105
+ g.section(:name=>@name) do |s|
106
+ s.table(:name=>_("Eigenvalues"),:header=>[_("Value")]) do |t|
107
+ eigenvalues.each_with_index do |e,i|
108
+ t.row([@errors.include?(i) ? "*" : "%0.6f" % e])
109
+ end
110
+ end
111
+ s.table(:name=>_("Velicer's Average Squared Correlations"), :header=>[_("number of components"),_("average square correlation")]) do |t|
112
+ fm.each_with_index do |v,i|
113
+ t.row(["%d" % i, @errors.include?(i) ? "*" : "%0.6f" % v])
114
+ end
115
+ end
116
+ s.text(_("The smallest average squared correlation is : %0.6f" % minfm))
117
+ s.text(_("The number of components is : %d" % number_of_factors))
118
+ end
119
+ end
120
+ dirty_memoize :number_of_factors, :fm, :minfm, :eigenvalues
121
+
122
+ end
123
+ end
124
+ end