statsample-ekatena 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
@@ -0,0 +1,232 @@
|
|
1
|
+
module Statsample
|
2
|
+
class DominanceAnalysis
|
3
|
+
# == Goal
|
4
|
+
# Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
|
5
|
+
#
|
6
|
+
# == Usage
|
7
|
+
#
|
8
|
+
# require 'statsample'
|
9
|
+
# a = Daru::Vector.new(100.times.collect {rand})
|
10
|
+
# b = Daru::Vector.new(100.times.collect {rand})
|
11
|
+
# c = Daru::Vector.new(100.times.collect {rand})
|
12
|
+
# d = Daru::Vector.new(100.times.collect {rand})
|
13
|
+
# ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
|
14
|
+
# ds[:y] = ds.collect_rows { |row| row[:a]*5+row[:b]*2+row[:c]*2+row[:d]*2+10*rand() }
|
15
|
+
# dab=Statsample::DominanceAnalysis::Bootstrap.new(ds, :y, :debug=>true)
|
16
|
+
# dab.bootstrap(100,nil)
|
17
|
+
# puts dab.summary
|
18
|
+
# <strong>Output</strong>
|
19
|
+
# Sample size: 100
|
20
|
+
# t: 1.98421693632958
|
21
|
+
#
|
22
|
+
# Linear Regression Engine: Statsample::Regression::Multiple::MatrixEngine
|
23
|
+
# Table: Bootstrap report
|
24
|
+
# --------------------------------------------------------------------------------------------
|
25
|
+
# | pairs | sD | Dij | SE(Dij) | Pij | Pji | Pno | Reproducibility |
|
26
|
+
# --------------------------------------------------------------------------------------------
|
27
|
+
# | Complete dominance |
|
28
|
+
# --------------------------------------------------------------------------------------------
|
29
|
+
# | a - b | 1.0 | 0.6150 | 0.454 | 0.550 | 0.320 | 0.130 | 0.550 |
|
30
|
+
# | a - c | 1.0 | 0.9550 | 0.175 | 0.930 | 0.020 | 0.050 | 0.930 |
|
31
|
+
# | a - d | 1.0 | 0.9750 | 0.131 | 0.960 | 0.010 | 0.030 | 0.960 |
|
32
|
+
# | b - c | 1.0 | 0.8800 | 0.276 | 0.820 | 0.060 | 0.120 | 0.820 |
|
33
|
+
# | b - d | 1.0 | 0.9250 | 0.193 | 0.860 | 0.010 | 0.130 | 0.860 |
|
34
|
+
# | c - d | 0.5 | 0.5950 | 0.346 | 0.350 | 0.160 | 0.490 | 0.490 |
|
35
|
+
# --------------------------------------------------------------------------------------------
|
36
|
+
# | Conditional dominance |
|
37
|
+
# --------------------------------------------------------------------------------------------
|
38
|
+
# | a - b | 1.0 | 0.6300 | 0.458 | 0.580 | 0.320 | 0.100 | 0.580 |
|
39
|
+
# | a - c | 1.0 | 0.9700 | 0.156 | 0.960 | 0.020 | 0.020 | 0.960 |
|
40
|
+
# | a - d | 1.0 | 0.9800 | 0.121 | 0.970 | 0.010 | 0.020 | 0.970 |
|
41
|
+
# | b - c | 1.0 | 0.8850 | 0.283 | 0.840 | 0.070 | 0.090 | 0.840 |
|
42
|
+
# | b - d | 1.0 | 0.9500 | 0.181 | 0.920 | 0.020 | 0.060 | 0.920 |
|
43
|
+
# | c - d | 0.5 | 0.5800 | 0.360 | 0.350 | 0.190 | 0.460 | 0.460 |
|
44
|
+
# --------------------------------------------------------------------------------------------
|
45
|
+
# | General Dominance |
|
46
|
+
# --------------------------------------------------------------------------------------------
|
47
|
+
# | a - b | 1.0 | 0.6500 | 0.479 | 0.650 | 0.350 | 0.000 | 0.650 |
|
48
|
+
# | a - c | 1.0 | 0.9800 | 0.141 | 0.980 | 0.020 | 0.000 | 0.980 |
|
49
|
+
# | a - d | 1.0 | 0.9900 | 0.100 | 0.990 | 0.010 | 0.000 | 0.990 |
|
50
|
+
# | b - c | 1.0 | 0.9000 | 0.302 | 0.900 | 0.100 | 0.000 | 0.900 |
|
51
|
+
# | b - d | 1.0 | 0.9700 | 0.171 | 0.970 | 0.030 | 0.000 | 0.970 |
|
52
|
+
# | c - d | 1.0 | 0.5600 | 0.499 | 0.560 | 0.440 | 0.000 | 0.560 |
|
53
|
+
# --------------------------------------------------------------------------------------------
|
54
|
+
#
|
55
|
+
# Table: General averages
|
56
|
+
# ---------------------------------------
|
57
|
+
# | var | mean | se | p.5 | p.95 |
|
58
|
+
# ---------------------------------------
|
59
|
+
# | a | 0.133 | 0.049 | 0.062 | 0.218 |
|
60
|
+
# | b | 0.106 | 0.048 | 0.029 | 0.199 |
|
61
|
+
# | c | 0.035 | 0.032 | 0.002 | 0.106 |
|
62
|
+
# | d | 0.023 | 0.019 | 0.002 | 0.062 |
|
63
|
+
# ---------------------------------------
|
64
|
+
#
|
65
|
+
# == References:
|
66
|
+
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
|
67
|
+
class Bootstrap
|
68
|
+
include Writable
|
69
|
+
include Summarizable
|
70
|
+
# Total Dominance results
|
71
|
+
attr_reader :samples_td
|
72
|
+
# Conditional Dominance results
|
73
|
+
attr_reader :samples_cd
|
74
|
+
# General Dominance results
|
75
|
+
attr_reader :samples_gd
|
76
|
+
# General average results
|
77
|
+
attr_reader :samples_ga
|
78
|
+
# Name of fields
|
79
|
+
attr_reader :fields
|
80
|
+
# Regression class used for analysis
|
81
|
+
attr_accessor :regression_class
|
82
|
+
# Dataset
|
83
|
+
attr_accessor :ds
|
84
|
+
# Name of analysis
|
85
|
+
attr_accessor :name
|
86
|
+
# Alpha level of confidence. Default: ALPHA
|
87
|
+
attr_accessor :alpha
|
88
|
+
# Debug?
|
89
|
+
attr_accessor :debug
|
90
|
+
# Default level of confidence for t calculation
|
91
|
+
ALPHA=0.95
|
92
|
+
# Create a new Dominance Analysis Bootstrap Object
|
93
|
+
#
|
94
|
+
# * ds: A Daru::DataFrame object
|
95
|
+
# * y_var: Name of dependent variable
|
96
|
+
# * opts: Any other attribute of the class
|
97
|
+
def initialize(ds,y_var, opts=Hash.new)
|
98
|
+
@ds = ds
|
99
|
+
@y_var = y_var.respond_to?(:to_sym) ? y_var.to_sym : y_var
|
100
|
+
@n = ds.nrows
|
101
|
+
|
102
|
+
@n_samples=0
|
103
|
+
@alpha=ALPHA
|
104
|
+
@debug=false
|
105
|
+
if y_var.is_a? Array
|
106
|
+
@fields=ds.vectors.to_a - y_var
|
107
|
+
@regression_class=Regression::Multiple::MultipleDependent
|
108
|
+
|
109
|
+
else
|
110
|
+
@fields=ds.vectors.to_a - [y_var]
|
111
|
+
@regression_class=Regression::Multiple::MatrixEngine
|
112
|
+
end
|
113
|
+
@samples_ga=@fields.inject({}) { |a,v| a[v]=[]; a }
|
114
|
+
|
115
|
+
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
|
116
|
+
opts.each{|k,v|
|
117
|
+
self.send("#{k}=",v) if self.respond_to? k
|
118
|
+
}
|
119
|
+
create_samples_pairs
|
120
|
+
end
|
121
|
+
# lr_class deprecated
|
122
|
+
alias_method :lr_class, :regression_class
|
123
|
+
def da
|
124
|
+
if @da.nil?
|
125
|
+
@da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
|
126
|
+
end
|
127
|
+
@da
|
128
|
+
end
|
129
|
+
# Creates n re-samples from original dataset and store result of
|
130
|
+
# each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
|
131
|
+
#
|
132
|
+
# * number_samples: Number of new samples to add
|
133
|
+
# * n: size of each new sample. If nil, equal to original sample size
|
134
|
+
def bootstrap(number_samples,n=nil)
|
135
|
+
number_samples.times{ |t|
|
136
|
+
@n_samples+=1
|
137
|
+
puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
|
138
|
+
ds_boot=@ds.bootstrap(n)
|
139
|
+
da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
|
140
|
+
|
141
|
+
da_1.total_dominance.each{|k,v|
|
142
|
+
@samples_td[k].push(v)
|
143
|
+
}
|
144
|
+
da_1.conditional_dominance.each{|k,v|
|
145
|
+
@samples_cd[k].push(v)
|
146
|
+
}
|
147
|
+
da_1.general_dominance.each{|k,v|
|
148
|
+
@samples_gd[k].push(v)
|
149
|
+
}
|
150
|
+
da_1.general_averages.each{|k,v|
|
151
|
+
@samples_ga[k].push(v)
|
152
|
+
}
|
153
|
+
}
|
154
|
+
end
|
155
|
+
def create_samples_pairs
|
156
|
+
@samples_td={}
|
157
|
+
@samples_cd={}
|
158
|
+
@samples_gd={}
|
159
|
+
@pairs=[]
|
160
|
+
c=(0...@fields.size).to_a.combination(2)
|
161
|
+
c.each do |data|
|
162
|
+
p data
|
163
|
+
convert=data.collect {|i| @fields[i] }
|
164
|
+
@pairs.push(convert)
|
165
|
+
[@samples_td, @samples_cd, @samples_gd].each{|s|
|
166
|
+
s[convert]=[]
|
167
|
+
}
|
168
|
+
end
|
169
|
+
end
|
170
|
+
def t
|
171
|
+
Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
|
172
|
+
end
|
173
|
+
def report_building(builder) # :nodoc:
|
174
|
+
raise "You should bootstrap first" if @n_samples==0
|
175
|
+
builder.section(:name=>@name) do |generator|
|
176
|
+
generator.text _("Sample size: %d\n") % @n_samples
|
177
|
+
generator.text "t: #{t}\n"
|
178
|
+
generator.text _("Linear Regression Engine: %s") % @regression_class.name
|
179
|
+
|
180
|
+
table=ReportBuilder::Table.new(:name=>"Bootstrap report", :header => [_("pairs"), "sD","Dij", _("SE(Dij)"), "Pij", "Pji", "Pno", _("Reproducibility")])
|
181
|
+
table.row([_("Complete dominance"),"","","","","","",""])
|
182
|
+
table.hr
|
183
|
+
@pairs.each{|pair|
|
184
|
+
std=Daru::Vector.new(@samples_td[pair])
|
185
|
+
ttd=da.total_dominance_pairwise(pair[0],pair[1])
|
186
|
+
table.row(summary_pairs(pair,std,ttd))
|
187
|
+
}
|
188
|
+
table.hr
|
189
|
+
table.row([_("Conditional dominance"),"","","","","","",""])
|
190
|
+
table.hr
|
191
|
+
@pairs.each{|pair|
|
192
|
+
std=Daru::Vector.new(@samples_cd[pair])
|
193
|
+
ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
|
194
|
+
table.row(summary_pairs(pair,std,ttd))
|
195
|
+
|
196
|
+
}
|
197
|
+
table.hr
|
198
|
+
table.row([_("General Dominance"),"","","","","","",""])
|
199
|
+
table.hr
|
200
|
+
@pairs.each{|pair|
|
201
|
+
std=Daru::Vector.new(@samples_gd[pair])
|
202
|
+
ttd=da.general_dominance_pairwise(pair[0],pair[1])
|
203
|
+
table.row(summary_pairs(pair,std,ttd))
|
204
|
+
}
|
205
|
+
generator.parse_element(table)
|
206
|
+
|
207
|
+
table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
|
208
|
+
|
209
|
+
@fields.each{|f|
|
210
|
+
v=Daru::Vector.new(@samples_ga[f])
|
211
|
+
row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
|
212
|
+
table.row(row)
|
213
|
+
}
|
214
|
+
|
215
|
+
generator.parse_element(table)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
def summary_pairs(pair,std,ttd)
|
219
|
+
freqs=std.proportions
|
220
|
+
[0, 0.5, 1].each{|n|
|
221
|
+
freqs[n]=0 if freqs[n].nil?
|
222
|
+
}
|
223
|
+
name="%s - %s" % [@ds[pair[0]].name, @ds[pair[1]].name]
|
224
|
+
[name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
|
225
|
+
end
|
226
|
+
def f(v,n=3)
|
227
|
+
prec="%0.#{n}f"
|
228
|
+
sprintf(prec,v)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'statsample/factor/rotation'
|
2
|
+
require 'statsample/factor/pca'
|
3
|
+
require 'statsample/factor/principalaxis'
|
4
|
+
require 'statsample/factor/parallelanalysis'
|
5
|
+
require 'statsample/factor/map'
|
6
|
+
|
7
|
+
module Statsample
|
8
|
+
# Factor Analysis toolbox.
|
9
|
+
# * Classes for Extraction of factors:
|
10
|
+
# * Statsample::Factor::PCA
|
11
|
+
# * Statsample::Factor::PrincipalAxis
|
12
|
+
# * Classes for Rotation of factors:
|
13
|
+
# * Statsample::Factor::Varimax
|
14
|
+
# * Statsample::Factor::Equimax
|
15
|
+
# * Statsample::Factor::Quartimax
|
16
|
+
# * Classes for determining the number of components
|
17
|
+
# * Statsample::Factor::MAP
|
18
|
+
# * Statsample::Factor::ParallelAnalysis
|
19
|
+
#
|
20
|
+
# About number of components, O'Connor(2000) said:
|
21
|
+
# The two procedures [PA and MAP ] complement each other nicely,
|
22
|
+
# in that the MAP tends to err (when it does err) in the direction
|
23
|
+
# of underextraction, whereas parallel analysis tends to err
|
24
|
+
# (when it does err) in the direction of overextraction.
|
25
|
+
# Optimal decisions are thus likely to be made after considering
|
26
|
+
# the results of both analytic procedures. (p.10)
|
27
|
+
|
28
|
+
module Factor
|
29
|
+
# Anti-image covariance matrix.
|
30
|
+
# Useful for inspection of desireability of data for factor analysis.
|
31
|
+
# According to Dziuban & Shirkey (1974, p.359):
|
32
|
+
# "If this matrix does not exhibit many zero off-diagonal elements,
|
33
|
+
# the investigator has evidence that the correlation
|
34
|
+
# matrix is not appropriate for factor analysis."
|
35
|
+
#
|
36
|
+
def self.anti_image_covariance_matrix(matrix)
|
37
|
+
s2=Matrix.diagonal(*(matrix.inverse.diagonal)).inverse
|
38
|
+
aicm=(s2)*matrix.inverse*(s2)
|
39
|
+
aicm.extend(Statsample::CovariateMatrix)
|
40
|
+
aicm.fields=matrix.fields if matrix.respond_to? :fields
|
41
|
+
aicm
|
42
|
+
end
|
43
|
+
def self.anti_image_correlation_matrix(matrix)
|
44
|
+
matrix=matrix.to_matrix
|
45
|
+
s=Matrix.diagonal(*(matrix.inverse.diagonal)).sqrt.inverse
|
46
|
+
aicm=s*matrix.inverse*s
|
47
|
+
|
48
|
+
aicm.extend(Statsample::CovariateMatrix)
|
49
|
+
aicm.fields=matrix.fields if matrix.respond_to? :fields
|
50
|
+
aicm
|
51
|
+
end
|
52
|
+
|
53
|
+
# Kaiser-Meyer-Olkin measure of sampling adequacy for correlation matrix.
|
54
|
+
#
|
55
|
+
# Kaiser's (1974, cited on Dziuban & Shirkey, 1974) present calibration of the index is as follows :
|
56
|
+
# * .90s—marvelous
|
57
|
+
# * .80s— meritorious
|
58
|
+
# * .70s—middling
|
59
|
+
# * .60s—mediocre
|
60
|
+
# * .50s—miserable
|
61
|
+
# * .50 •—unacceptable
|
62
|
+
def self.kmo(matrix)
|
63
|
+
q=anti_image_correlation_matrix(matrix)
|
64
|
+
n=matrix.row_size
|
65
|
+
sum_r,sum_q=0,0
|
66
|
+
n.times do |j|
|
67
|
+
n.times do |k|
|
68
|
+
if j!=k
|
69
|
+
sum_r+=matrix[j,k]**2
|
70
|
+
sum_q+=q[j,k]**2
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
sum_r.quo(sum_r+sum_q)
|
75
|
+
end
|
76
|
+
# Kaiser-Meyer-Olkin measure of sampling adequacy for one variable.
|
77
|
+
#
|
78
|
+
def self.kmo_univariate(matrix, var)
|
79
|
+
if var.is_a? String
|
80
|
+
if matrix.respond_to? :fields
|
81
|
+
j=matrix.fields.index(var)
|
82
|
+
raise "Matrix doesn't have field #{var}" if j.nil?
|
83
|
+
else
|
84
|
+
raise "Matrix doesn't respond to fields"
|
85
|
+
end
|
86
|
+
else
|
87
|
+
j=var
|
88
|
+
end
|
89
|
+
|
90
|
+
q=anti_image_correlation_matrix(matrix)
|
91
|
+
n=matrix.row_size
|
92
|
+
|
93
|
+
sum_r,sum_q=0,0
|
94
|
+
|
95
|
+
n.times do |k|
|
96
|
+
if j!=k
|
97
|
+
sum_r+=matrix[j,k]**2
|
98
|
+
sum_q+=q[j,k]**2
|
99
|
+
end
|
100
|
+
end
|
101
|
+
sum_r.quo(sum_r+sum_q)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Factor
|
3
|
+
# = Velicer's Minimum Average Partial
|
4
|
+
#
|
5
|
+
# "Velicer’s (1976) MAP test involves a complete princi-
|
6
|
+
# pal components analysis followed by the examination of
|
7
|
+
# a series of matrices of partial correlations. Specifically,
|
8
|
+
# on the first step, the first principal component is par-
|
9
|
+
# tialed out of the correlations between the variables of in-
|
10
|
+
# terest, and the average squared coefficient in the off-
|
11
|
+
# diagonals of the resulting partial correlation matrix is
|
12
|
+
# computed. On the second step, the first two principal
|
13
|
+
# components are partialed out of the original correlation
|
14
|
+
# matrix and the average squared partial correlation is
|
15
|
+
# again computed. These computations are conducted for k
|
16
|
+
# (the number of variables) minus one steps. The average
|
17
|
+
# squared partial correlations from these steps are then
|
18
|
+
# lined up, and the number of components is determined by
|
19
|
+
# the step number in the analyses that resulted in the lowest
|
20
|
+
# average squared partial correlation. The average squared
|
21
|
+
# coefficient in the original correlation matrix is also com-
|
22
|
+
# puted, and if this coefficient happens to be lower than
|
23
|
+
# the lowest average squared partial correlation, then no
|
24
|
+
# components should be extracted from the correlation ma-
|
25
|
+
# trix. Statistically, components are retained as long as the
|
26
|
+
# variance in the correlation matrix represents systematic
|
27
|
+
# variance. Components are no longer retained when there
|
28
|
+
# is proportionately more unsystematic variance than sys-
|
29
|
+
# tematic variance." (O'Connor, 2000, p.397).
|
30
|
+
#
|
31
|
+
# Current algorithm is loosely based on SPSS O'Connor algorithm
|
32
|
+
#
|
33
|
+
# == Reference
|
34
|
+
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
|
35
|
+
#
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
class MAP
|
40
|
+
include Summarizable
|
41
|
+
include DirtyMemoize
|
42
|
+
# Name of analysis
|
43
|
+
attr_accessor :name
|
44
|
+
attr_reader :eigenvalues
|
45
|
+
# Number of factors to retain
|
46
|
+
attr_reader :number_of_factors
|
47
|
+
# Average squared correlations
|
48
|
+
attr_reader :fm
|
49
|
+
# Smallest average squared correlation
|
50
|
+
attr_reader :minfm
|
51
|
+
|
52
|
+
attr_accessor :use_gsl
|
53
|
+
def self.with_dataset(ds,opts=Hash.new)
|
54
|
+
new(ds.correlation_matrix,opts)
|
55
|
+
end
|
56
|
+
def initialize(matrix, opts=Hash.new)
|
57
|
+
@matrix=matrix
|
58
|
+
opts_default={
|
59
|
+
:use_gsl=>true,
|
60
|
+
:name=>_("Velicer's MAP")
|
61
|
+
}
|
62
|
+
@opts=opts_default.merge(opts)
|
63
|
+
opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
|
64
|
+
end
|
65
|
+
def compute
|
66
|
+
gsl_m=(use_gsl and Statsample.has_gsl?) ? @matrix.to_gsl : @matrix
|
67
|
+
klass_m=gsl_m.class
|
68
|
+
eigvect,@eigenvalues=gsl_m.eigenvectors_matrix, gsl_m.eigenvalues
|
69
|
+
eigenvalues_sqrt=@eigenvalues.collect {|v| Math.sqrt(v)}
|
70
|
+
loadings=eigvect*(klass_m.diagonal(*eigenvalues_sqrt))
|
71
|
+
fm=Array.new(@matrix.row_size)
|
72
|
+
ncol=@matrix.column_size
|
73
|
+
|
74
|
+
fm[0]=(gsl_m.mssq - ncol).quo(ncol*(ncol-1))
|
75
|
+
|
76
|
+
(ncol-1).times do |m|
|
77
|
+
puts "MAP:Eigenvalue #{m+1}" if $DEBUG
|
78
|
+
a=use_gsl ? loadings[0..(loadings.row_size-1),0..m] :
|
79
|
+
loadings.minor(0..(loadings.row_size-1),0..m)
|
80
|
+
partcov= gsl_m - (a*a.transpose)
|
81
|
+
|
82
|
+
d=klass_m.diagonal(*(partcov.diagonal.collect {|v| Math::sqrt(1/v)}))
|
83
|
+
pr=d*partcov*d
|
84
|
+
fm[m+1]=(pr.mssq-ncol).quo(ncol*(ncol-1))
|
85
|
+
end
|
86
|
+
minfm=fm[0]
|
87
|
+
nfactors=0
|
88
|
+
@errors=[]
|
89
|
+
fm.each_with_index do |v,s|
|
90
|
+
if defined?(Complex) and v.is_a? ::Complex
|
91
|
+
@errors.push(s)
|
92
|
+
else
|
93
|
+
if v < minfm
|
94
|
+
minfm=v
|
95
|
+
nfactors=s
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
@number_of_factors=nfactors
|
100
|
+
@fm=fm
|
101
|
+
@minfm=minfm
|
102
|
+
|
103
|
+
end
|
104
|
+
def report_building(g) #:nodoc:
|
105
|
+
g.section(:name=>@name) do |s|
|
106
|
+
s.table(:name=>_("Eigenvalues"),:header=>[_("Value")]) do |t|
|
107
|
+
eigenvalues.each_with_index do |e,i|
|
108
|
+
t.row([@errors.include?(i) ? "*" : "%0.6f" % e])
|
109
|
+
end
|
110
|
+
end
|
111
|
+
s.table(:name=>_("Velicer's Average Squared Correlations"), :header=>[_("number of components"),_("average square correlation")]) do |t|
|
112
|
+
fm.each_with_index do |v,i|
|
113
|
+
t.row(["%d" % i, @errors.include?(i) ? "*" : "%0.6f" % v])
|
114
|
+
end
|
115
|
+
end
|
116
|
+
s.text(_("The smallest average squared correlation is : %0.6f" % minfm))
|
117
|
+
s.text(_("The number of components is : %d" % number_of_factors))
|
118
|
+
end
|
119
|
+
end
|
120
|
+
dirty_memoize :number_of_factors, :fm, :minfm, :eigenvalues
|
121
|
+
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|