statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,232 @@
1
+ module Statsample
2
+ class DominanceAnalysis
3
+ # == Goal
4
+ # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
5
+ #
6
+ # == Usage
7
+ #
8
+ # require 'statsample'
9
+ # a = Daru::Vector.new(100.times.collect {rand})
10
+ # b = Daru::Vector.new(100.times.collect {rand})
11
+ # c = Daru::Vector.new(100.times.collect {rand})
12
+ # d = Daru::Vector.new(100.times.collect {rand})
13
+ # ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
14
+ # ds[:y] = ds.collect_rows { |row| row[:a]*5+row[:b]*2+row[:c]*2+row[:d]*2+10*rand() }
15
+ # dab=Statsample::DominanceAnalysis::Bootstrap.new(ds, :y, :debug=>true)
16
+ # dab.bootstrap(100,nil)
17
+ # puts dab.summary
18
+ # <strong>Output</strong>
19
+ # Sample size: 100
20
+ # t: 1.98421693632958
21
+ #
22
+ # Linear Regression Engine: Statsample::Regression::Multiple::MatrixEngine
23
+ # Table: Bootstrap report
24
+ # --------------------------------------------------------------------------------------------
25
+ # | pairs | sD | Dij | SE(Dij) | Pij | Pji | Pno | Reproducibility |
26
+ # --------------------------------------------------------------------------------------------
27
+ # | Complete dominance |
28
+ # --------------------------------------------------------------------------------------------
29
+ # | a - b | 1.0 | 0.6150 | 0.454 | 0.550 | 0.320 | 0.130 | 0.550 |
30
+ # | a - c | 1.0 | 0.9550 | 0.175 | 0.930 | 0.020 | 0.050 | 0.930 |
31
+ # | a - d | 1.0 | 0.9750 | 0.131 | 0.960 | 0.010 | 0.030 | 0.960 |
32
+ # | b - c | 1.0 | 0.8800 | 0.276 | 0.820 | 0.060 | 0.120 | 0.820 |
33
+ # | b - d | 1.0 | 0.9250 | 0.193 | 0.860 | 0.010 | 0.130 | 0.860 |
34
+ # | c - d | 0.5 | 0.5950 | 0.346 | 0.350 | 0.160 | 0.490 | 0.490 |
35
+ # --------------------------------------------------------------------------------------------
36
+ # | Conditional dominance |
37
+ # --------------------------------------------------------------------------------------------
38
+ # | a - b | 1.0 | 0.6300 | 0.458 | 0.580 | 0.320 | 0.100 | 0.580 |
39
+ # | a - c | 1.0 | 0.9700 | 0.156 | 0.960 | 0.020 | 0.020 | 0.960 |
40
+ # | a - d | 1.0 | 0.9800 | 0.121 | 0.970 | 0.010 | 0.020 | 0.970 |
41
+ # | b - c | 1.0 | 0.8850 | 0.283 | 0.840 | 0.070 | 0.090 | 0.840 |
42
+ # | b - d | 1.0 | 0.9500 | 0.181 | 0.920 | 0.020 | 0.060 | 0.920 |
43
+ # | c - d | 0.5 | 0.5800 | 0.360 | 0.350 | 0.190 | 0.460 | 0.460 |
44
+ # --------------------------------------------------------------------------------------------
45
+ # | General Dominance |
46
+ # --------------------------------------------------------------------------------------------
47
+ # | a - b | 1.0 | 0.6500 | 0.479 | 0.650 | 0.350 | 0.000 | 0.650 |
48
+ # | a - c | 1.0 | 0.9800 | 0.141 | 0.980 | 0.020 | 0.000 | 0.980 |
49
+ # | a - d | 1.0 | 0.9900 | 0.100 | 0.990 | 0.010 | 0.000 | 0.990 |
50
+ # | b - c | 1.0 | 0.9000 | 0.302 | 0.900 | 0.100 | 0.000 | 0.900 |
51
+ # | b - d | 1.0 | 0.9700 | 0.171 | 0.970 | 0.030 | 0.000 | 0.970 |
52
+ # | c - d | 1.0 | 0.5600 | 0.499 | 0.560 | 0.440 | 0.000 | 0.560 |
53
+ # --------------------------------------------------------------------------------------------
54
+ #
55
+ # Table: General averages
56
+ # ---------------------------------------
57
+ # | var | mean | se | p.5 | p.95 |
58
+ # ---------------------------------------
59
+ # | a | 0.133 | 0.049 | 0.062 | 0.218 |
60
+ # | b | 0.106 | 0.048 | 0.029 | 0.199 |
61
+ # | c | 0.035 | 0.032 | 0.002 | 0.106 |
62
+ # | d | 0.023 | 0.019 | 0.002 | 0.062 |
63
+ # ---------------------------------------
64
+ #
65
+ # == References:
66
+ # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
67
+ class Bootstrap
68
+ include Writable
69
+ include Summarizable
70
+ # Total Dominance results
71
+ attr_reader :samples_td
72
+ # Conditional Dominance results
73
+ attr_reader :samples_cd
74
+ # General Dominance results
75
+ attr_reader :samples_gd
76
+ # General average results
77
+ attr_reader :samples_ga
78
+ # Name of fields
79
+ attr_reader :fields
80
+ # Regression class used for analysis
81
+ attr_accessor :regression_class
82
+ # Dataset
83
+ attr_accessor :ds
84
+ # Name of analysis
85
+ attr_accessor :name
86
+ # Alpha level of confidence. Default: ALPHA
87
+ attr_accessor :alpha
88
+ # Debug?
89
+ attr_accessor :debug
90
+ # Default level of confidence for t calculation
91
+ ALPHA=0.95
92
+ # Create a new Dominance Analysis Bootstrap Object
93
+ #
94
+ # * ds: A Daru::DataFrame object
95
+ # * y_var: Name of dependent variable
96
+ # * opts: Any other attribute of the class
97
+ def initialize(ds,y_var, opts=Hash.new)
98
+ @ds = ds
99
+ @y_var = y_var.respond_to?(:to_sym) ? y_var.to_sym : y_var
100
+ @n = ds.nrows
101
+
102
+ @n_samples=0
103
+ @alpha=ALPHA
104
+ @debug=false
105
+ if y_var.is_a? Array
106
+ @fields=ds.vectors.to_a - y_var
107
+ @regression_class=Regression::Multiple::MultipleDependent
108
+
109
+ else
110
+ @fields=ds.vectors.to_a - [y_var]
111
+ @regression_class=Regression::Multiple::MatrixEngine
112
+ end
113
+ @samples_ga=@fields.inject({}) { |a,v| a[v]=[]; a }
114
+
115
+ @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
116
+ opts.each{|k,v|
117
+ self.send("#{k}=",v) if self.respond_to? k
118
+ }
119
+ create_samples_pairs
120
+ end
121
+ # lr_class deprecated
122
+ alias_method :lr_class, :regression_class
123
+ def da
124
+ if @da.nil?
125
+ @da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
126
+ end
127
+ @da
128
+ end
129
+ # Creates n re-samples from original dataset and store result of
130
+ # each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
131
+ #
132
+ # * number_samples: Number of new samples to add
133
+ # * n: size of each new sample. If nil, equal to original sample size
134
+ def bootstrap(number_samples,n=nil)
135
+ number_samples.times{ |t|
136
+ @n_samples+=1
137
+ puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
138
+ ds_boot=@ds.bootstrap(n)
139
+ da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
140
+
141
+ da_1.total_dominance.each{|k,v|
142
+ @samples_td[k].push(v)
143
+ }
144
+ da_1.conditional_dominance.each{|k,v|
145
+ @samples_cd[k].push(v)
146
+ }
147
+ da_1.general_dominance.each{|k,v|
148
+ @samples_gd[k].push(v)
149
+ }
150
+ da_1.general_averages.each{|k,v|
151
+ @samples_ga[k].push(v)
152
+ }
153
+ }
154
+ end
155
+ def create_samples_pairs
156
+ @samples_td={}
157
+ @samples_cd={}
158
+ @samples_gd={}
159
+ @pairs=[]
160
+ c=(0...@fields.size).to_a.combination(2)
161
+ c.each do |data|
162
+ p data
163
+ convert=data.collect {|i| @fields[i] }
164
+ @pairs.push(convert)
165
+ [@samples_td, @samples_cd, @samples_gd].each{|s|
166
+ s[convert]=[]
167
+ }
168
+ end
169
+ end
170
+ def t
171
+ Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
172
+ end
173
+ def report_building(builder) # :nodoc:
174
+ raise "You should bootstrap first" if @n_samples==0
175
+ builder.section(:name=>@name) do |generator|
176
+ generator.text _("Sample size: %d\n") % @n_samples
177
+ generator.text "t: #{t}\n"
178
+ generator.text _("Linear Regression Engine: %s") % @regression_class.name
179
+
180
+ table=ReportBuilder::Table.new(:name=>"Bootstrap report", :header => [_("pairs"), "sD","Dij", _("SE(Dij)"), "Pij", "Pji", "Pno", _("Reproducibility")])
181
+ table.row([_("Complete dominance"),"","","","","","",""])
182
+ table.hr
183
+ @pairs.each{|pair|
184
+ std=Daru::Vector.new(@samples_td[pair])
185
+ ttd=da.total_dominance_pairwise(pair[0],pair[1])
186
+ table.row(summary_pairs(pair,std,ttd))
187
+ }
188
+ table.hr
189
+ table.row([_("Conditional dominance"),"","","","","","",""])
190
+ table.hr
191
+ @pairs.each{|pair|
192
+ std=Daru::Vector.new(@samples_cd[pair])
193
+ ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
194
+ table.row(summary_pairs(pair,std,ttd))
195
+
196
+ }
197
+ table.hr
198
+ table.row([_("General Dominance"),"","","","","","",""])
199
+ table.hr
200
+ @pairs.each{|pair|
201
+ std=Daru::Vector.new(@samples_gd[pair])
202
+ ttd=da.general_dominance_pairwise(pair[0],pair[1])
203
+ table.row(summary_pairs(pair,std,ttd))
204
+ }
205
+ generator.parse_element(table)
206
+
207
+ table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
208
+
209
+ @fields.each{|f|
210
+ v=Daru::Vector.new(@samples_ga[f])
211
+ row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
212
+ table.row(row)
213
+ }
214
+
215
+ generator.parse_element(table)
216
+ end
217
+ end
218
+ def summary_pairs(pair,std,ttd)
219
+ freqs=std.proportions
220
+ [0, 0.5, 1].each{|n|
221
+ freqs[n]=0 if freqs[n].nil?
222
+ }
223
+ name="%s - %s" % [@ds[pair[0]].name, @ds[pair[1]].name]
224
+ [name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
225
+ end
226
+ def f(v,n=3)
227
+ prec="%0.#{n}f"
228
+ sprintf(prec,v)
229
+ end
230
+ end
231
+ end
232
+ end
@@ -0,0 +1,104 @@
1
+ require 'statsample/factor/rotation'
2
+ require 'statsample/factor/pca'
3
+ require 'statsample/factor/principalaxis'
4
+ require 'statsample/factor/parallelanalysis'
5
+ require 'statsample/factor/map'
6
+
7
+ module Statsample
8
+ # Factor Analysis toolbox.
9
+ # * Classes for Extraction of factors:
10
+ # * Statsample::Factor::PCA
11
+ # * Statsample::Factor::PrincipalAxis
12
+ # * Classes for Rotation of factors:
13
+ # * Statsample::Factor::Varimax
14
+ # * Statsample::Factor::Equimax
15
+ # * Statsample::Factor::Quartimax
16
+ # * Classes for determining the number of components
17
+ # * Statsample::Factor::MAP
18
+ # * Statsample::Factor::ParallelAnalysis
19
+ #
20
+ # About number of components, O'Connor(2000) said:
21
+ # The two procedures [PA and MAP ] complement each other nicely,
22
+ # in that the MAP tends to err (when it does err) in the direction
23
+ # of underextraction, whereas parallel analysis tends to err
24
+ # (when it does err) in the direction of overextraction.
25
+ # Optimal decisions are thus likely to be made after considering
26
+ # the results of both analytic procedures. (p.10)
27
+
28
+ module Factor
29
+ # Anti-image covariance matrix.
30
+ # Useful for inspection of desireability of data for factor analysis.
31
+ # According to Dziuban & Shirkey (1974, p.359):
32
+ # "If this matrix does not exhibit many zero off-diagonal elements,
33
+ # the investigator has evidence that the correlation
34
+ # matrix is not appropriate for factor analysis."
35
+ #
36
+ def self.anti_image_covariance_matrix(matrix)
37
+ s2=Matrix.diagonal(*(matrix.inverse.diagonal)).inverse
38
+ aicm=(s2)*matrix.inverse*(s2)
39
+ aicm.extend(Statsample::CovariateMatrix)
40
+ aicm.fields=matrix.fields if matrix.respond_to? :fields
41
+ aicm
42
+ end
43
+ def self.anti_image_correlation_matrix(matrix)
44
+ matrix=matrix.to_matrix
45
+ s=Matrix.diagonal(*(matrix.inverse.diagonal)).sqrt.inverse
46
+ aicm=s*matrix.inverse*s
47
+
48
+ aicm.extend(Statsample::CovariateMatrix)
49
+ aicm.fields=matrix.fields if matrix.respond_to? :fields
50
+ aicm
51
+ end
52
+
53
+ # Kaiser-Meyer-Olkin measure of sampling adequacy for correlation matrix.
54
+ #
55
+ # Kaiser's (1974, cited on Dziuban & Shirkey, 1974) present calibration of the index is as follows :
56
+ # * .90s—marvelous
57
+ # * .80s— meritorious
58
+ # * .70s—middling
59
+ # * .60s—mediocre
60
+ # * .50s—miserable
61
+ # * .50 •—unacceptable
62
+ def self.kmo(matrix)
63
+ q=anti_image_correlation_matrix(matrix)
64
+ n=matrix.row_size
65
+ sum_r,sum_q=0,0
66
+ n.times do |j|
67
+ n.times do |k|
68
+ if j!=k
69
+ sum_r+=matrix[j,k]**2
70
+ sum_q+=q[j,k]**2
71
+ end
72
+ end
73
+ end
74
+ sum_r.quo(sum_r+sum_q)
75
+ end
76
+ # Kaiser-Meyer-Olkin measure of sampling adequacy for one variable.
77
+ #
78
+ def self.kmo_univariate(matrix, var)
79
+ if var.is_a? String
80
+ if matrix.respond_to? :fields
81
+ j=matrix.fields.index(var)
82
+ raise "Matrix doesn't have field #{var}" if j.nil?
83
+ else
84
+ raise "Matrix doesn't respond to fields"
85
+ end
86
+ else
87
+ j=var
88
+ end
89
+
90
+ q=anti_image_correlation_matrix(matrix)
91
+ n=matrix.row_size
92
+
93
+ sum_r,sum_q=0,0
94
+
95
+ n.times do |k|
96
+ if j!=k
97
+ sum_r+=matrix[j,k]**2
98
+ sum_q+=q[j,k]**2
99
+ end
100
+ end
101
+ sum_r.quo(sum_r+sum_q)
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,124 @@
1
+ module Statsample
2
+ module Factor
3
+ # = Velicer's Minimum Average Partial
4
+ #
5
+ # "Velicer’s (1976) MAP test involves a complete princi-
6
+ # pal components analysis followed by the examination of
7
+ # a series of matrices of partial correlations. Specifically,
8
+ # on the first step, the first principal component is par-
9
+ # tialed out of the correlations between the variables of in-
10
+ # terest, and the average squared coefficient in the off-
11
+ # diagonals of the resulting partial correlation matrix is
12
+ # computed. On the second step, the first two principal
13
+ # components are partialed out of the original correlation
14
+ # matrix and the average squared partial correlation is
15
+ # again computed. These computations are conducted for k
16
+ # (the number of variables) minus one steps. The average
17
+ # squared partial correlations from these steps are then
18
+ # lined up, and the number of components is determined by
19
+ # the step number in the analyses that resulted in the lowest
20
+ # average squared partial correlation. The average squared
21
+ # coefficient in the original correlation matrix is also com-
22
+ # puted, and if this coefficient happens to be lower than
23
+ # the lowest average squared partial correlation, then no
24
+ # components should be extracted from the correlation ma-
25
+ # trix. Statistically, components are retained as long as the
26
+ # variance in the correlation matrix represents systematic
27
+ # variance. Components are no longer retained when there
28
+ # is proportionately more unsystematic variance than sys-
29
+ # tematic variance." (O'Connor, 2000, p.397).
30
+ #
31
+ # Current algorithm is loosely based on SPSS O'Connor algorithm
32
+ #
33
+ # == Reference
34
+ # * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
35
+ #
36
+
37
+
38
+
39
+ class MAP
40
+ include Summarizable
41
+ include DirtyMemoize
42
+ # Name of analysis
43
+ attr_accessor :name
44
+ attr_reader :eigenvalues
45
+ # Number of factors to retain
46
+ attr_reader :number_of_factors
47
+ # Average squared correlations
48
+ attr_reader :fm
49
+ # Smallest average squared correlation
50
+ attr_reader :minfm
51
+
52
+ attr_accessor :use_gsl
53
+ def self.with_dataset(ds,opts=Hash.new)
54
+ new(ds.correlation_matrix,opts)
55
+ end
56
+ def initialize(matrix, opts=Hash.new)
57
+ @matrix=matrix
58
+ opts_default={
59
+ :use_gsl=>true,
60
+ :name=>_("Velicer's MAP")
61
+ }
62
+ @opts=opts_default.merge(opts)
63
+ opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
64
+ end
65
+ def compute
66
+ gsl_m=(use_gsl and Statsample.has_gsl?) ? @matrix.to_gsl : @matrix
67
+ klass_m=gsl_m.class
68
+ eigvect,@eigenvalues=gsl_m.eigenvectors_matrix, gsl_m.eigenvalues
69
+ eigenvalues_sqrt=@eigenvalues.collect {|v| Math.sqrt(v)}
70
+ loadings=eigvect*(klass_m.diagonal(*eigenvalues_sqrt))
71
+ fm=Array.new(@matrix.row_size)
72
+ ncol=@matrix.column_size
73
+
74
+ fm[0]=(gsl_m.mssq - ncol).quo(ncol*(ncol-1))
75
+
76
+ (ncol-1).times do |m|
77
+ puts "MAP:Eigenvalue #{m+1}" if $DEBUG
78
+ a=use_gsl ? loadings[0..(loadings.row_size-1),0..m] :
79
+ loadings.minor(0..(loadings.row_size-1),0..m)
80
+ partcov= gsl_m - (a*a.transpose)
81
+
82
+ d=klass_m.diagonal(*(partcov.diagonal.collect {|v| Math::sqrt(1/v)}))
83
+ pr=d*partcov*d
84
+ fm[m+1]=(pr.mssq-ncol).quo(ncol*(ncol-1))
85
+ end
86
+ minfm=fm[0]
87
+ nfactors=0
88
+ @errors=[]
89
+ fm.each_with_index do |v,s|
90
+ if defined?(Complex) and v.is_a? ::Complex
91
+ @errors.push(s)
92
+ else
93
+ if v < minfm
94
+ minfm=v
95
+ nfactors=s
96
+ end
97
+ end
98
+ end
99
+ @number_of_factors=nfactors
100
+ @fm=fm
101
+ @minfm=minfm
102
+
103
+ end
104
+ def report_building(g) #:nodoc:
105
+ g.section(:name=>@name) do |s|
106
+ s.table(:name=>_("Eigenvalues"),:header=>[_("Value")]) do |t|
107
+ eigenvalues.each_with_index do |e,i|
108
+ t.row([@errors.include?(i) ? "*" : "%0.6f" % e])
109
+ end
110
+ end
111
+ s.table(:name=>_("Velicer's Average Squared Correlations"), :header=>[_("number of components"),_("average square correlation")]) do |t|
112
+ fm.each_with_index do |v,i|
113
+ t.row(["%d" % i, @errors.include?(i) ? "*" : "%0.6f" % v])
114
+ end
115
+ end
116
+ s.text(_("The smallest average squared correlation is : %0.6f" % minfm))
117
+ s.text(_("The number of components is : %d" % number_of_factors))
118
+ end
119
+ end
120
+ dirty_memoize :number_of_factors, :fm, :minfm, :eigenvalues
121
+
122
+ end
123
+ end
124
+ end