statsample-ekatena 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
module Statsample
|
|
2
|
+
class DominanceAnalysis
|
|
3
|
+
# == Goal
|
|
4
|
+
# Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
|
|
5
|
+
#
|
|
6
|
+
# == Usage
|
|
7
|
+
#
|
|
8
|
+
# require 'statsample'
|
|
9
|
+
# a = Daru::Vector.new(100.times.collect {rand})
|
|
10
|
+
# b = Daru::Vector.new(100.times.collect {rand})
|
|
11
|
+
# c = Daru::Vector.new(100.times.collect {rand})
|
|
12
|
+
# d = Daru::Vector.new(100.times.collect {rand})
|
|
13
|
+
# ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
|
|
14
|
+
# ds[:y] = ds.collect_rows { |row| row[:a]*5+row[:b]*2+row[:c]*2+row[:d]*2+10*rand() }
|
|
15
|
+
# dab=Statsample::DominanceAnalysis::Bootstrap.new(ds, :y, :debug=>true)
|
|
16
|
+
# dab.bootstrap(100,nil)
|
|
17
|
+
# puts dab.summary
|
|
18
|
+
# <strong>Output</strong>
|
|
19
|
+
# Sample size: 100
|
|
20
|
+
# t: 1.98421693632958
|
|
21
|
+
#
|
|
22
|
+
# Linear Regression Engine: Statsample::Regression::Multiple::MatrixEngine
|
|
23
|
+
# Table: Bootstrap report
|
|
24
|
+
# --------------------------------------------------------------------------------------------
|
|
25
|
+
# | pairs | sD | Dij | SE(Dij) | Pij | Pji | Pno | Reproducibility |
|
|
26
|
+
# --------------------------------------------------------------------------------------------
|
|
27
|
+
# | Complete dominance |
|
|
28
|
+
# --------------------------------------------------------------------------------------------
|
|
29
|
+
# | a - b | 1.0 | 0.6150 | 0.454 | 0.550 | 0.320 | 0.130 | 0.550 |
|
|
30
|
+
# | a - c | 1.0 | 0.9550 | 0.175 | 0.930 | 0.020 | 0.050 | 0.930 |
|
|
31
|
+
# | a - d | 1.0 | 0.9750 | 0.131 | 0.960 | 0.010 | 0.030 | 0.960 |
|
|
32
|
+
# | b - c | 1.0 | 0.8800 | 0.276 | 0.820 | 0.060 | 0.120 | 0.820 |
|
|
33
|
+
# | b - d | 1.0 | 0.9250 | 0.193 | 0.860 | 0.010 | 0.130 | 0.860 |
|
|
34
|
+
# | c - d | 0.5 | 0.5950 | 0.346 | 0.350 | 0.160 | 0.490 | 0.490 |
|
|
35
|
+
# --------------------------------------------------------------------------------------------
|
|
36
|
+
# | Conditional dominance |
|
|
37
|
+
# --------------------------------------------------------------------------------------------
|
|
38
|
+
# | a - b | 1.0 | 0.6300 | 0.458 | 0.580 | 0.320 | 0.100 | 0.580 |
|
|
39
|
+
# | a - c | 1.0 | 0.9700 | 0.156 | 0.960 | 0.020 | 0.020 | 0.960 |
|
|
40
|
+
# | a - d | 1.0 | 0.9800 | 0.121 | 0.970 | 0.010 | 0.020 | 0.970 |
|
|
41
|
+
# | b - c | 1.0 | 0.8850 | 0.283 | 0.840 | 0.070 | 0.090 | 0.840 |
|
|
42
|
+
# | b - d | 1.0 | 0.9500 | 0.181 | 0.920 | 0.020 | 0.060 | 0.920 |
|
|
43
|
+
# | c - d | 0.5 | 0.5800 | 0.360 | 0.350 | 0.190 | 0.460 | 0.460 |
|
|
44
|
+
# --------------------------------------------------------------------------------------------
|
|
45
|
+
# | General Dominance |
|
|
46
|
+
# --------------------------------------------------------------------------------------------
|
|
47
|
+
# | a - b | 1.0 | 0.6500 | 0.479 | 0.650 | 0.350 | 0.000 | 0.650 |
|
|
48
|
+
# | a - c | 1.0 | 0.9800 | 0.141 | 0.980 | 0.020 | 0.000 | 0.980 |
|
|
49
|
+
# | a - d | 1.0 | 0.9900 | 0.100 | 0.990 | 0.010 | 0.000 | 0.990 |
|
|
50
|
+
# | b - c | 1.0 | 0.9000 | 0.302 | 0.900 | 0.100 | 0.000 | 0.900 |
|
|
51
|
+
# | b - d | 1.0 | 0.9700 | 0.171 | 0.970 | 0.030 | 0.000 | 0.970 |
|
|
52
|
+
# | c - d | 1.0 | 0.5600 | 0.499 | 0.560 | 0.440 | 0.000 | 0.560 |
|
|
53
|
+
# --------------------------------------------------------------------------------------------
|
|
54
|
+
#
|
|
55
|
+
# Table: General averages
|
|
56
|
+
# ---------------------------------------
|
|
57
|
+
# | var | mean | se | p.5 | p.95 |
|
|
58
|
+
# ---------------------------------------
|
|
59
|
+
# | a | 0.133 | 0.049 | 0.062 | 0.218 |
|
|
60
|
+
# | b | 0.106 | 0.048 | 0.029 | 0.199 |
|
|
61
|
+
# | c | 0.035 | 0.032 | 0.002 | 0.106 |
|
|
62
|
+
# | d | 0.023 | 0.019 | 0.002 | 0.062 |
|
|
63
|
+
# ---------------------------------------
|
|
64
|
+
#
|
|
65
|
+
# == References:
|
|
66
|
+
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
|
|
67
|
+
class Bootstrap
|
|
68
|
+
include Writable
|
|
69
|
+
include Summarizable
|
|
70
|
+
# Total Dominance results
|
|
71
|
+
attr_reader :samples_td
|
|
72
|
+
# Conditional Dominance results
|
|
73
|
+
attr_reader :samples_cd
|
|
74
|
+
# General Dominance results
|
|
75
|
+
attr_reader :samples_gd
|
|
76
|
+
# General average results
|
|
77
|
+
attr_reader :samples_ga
|
|
78
|
+
# Name of fields
|
|
79
|
+
attr_reader :fields
|
|
80
|
+
# Regression class used for analysis
|
|
81
|
+
attr_accessor :regression_class
|
|
82
|
+
# Dataset
|
|
83
|
+
attr_accessor :ds
|
|
84
|
+
# Name of analysis
|
|
85
|
+
attr_accessor :name
|
|
86
|
+
# Alpha level of confidence. Default: ALPHA
|
|
87
|
+
attr_accessor :alpha
|
|
88
|
+
# Debug?
|
|
89
|
+
attr_accessor :debug
|
|
90
|
+
# Default level of confidence for t calculation
|
|
91
|
+
ALPHA=0.95
|
|
92
|
+
# Create a new Dominance Analysis Bootstrap Object
|
|
93
|
+
#
|
|
94
|
+
# * ds: A Daru::DataFrame object
|
|
95
|
+
# * y_var: Name of dependent variable
|
|
96
|
+
# * opts: Any other attribute of the class
|
|
97
|
+
def initialize(ds,y_var, opts=Hash.new)
|
|
98
|
+
@ds = ds
|
|
99
|
+
@y_var = y_var.respond_to?(:to_sym) ? y_var.to_sym : y_var
|
|
100
|
+
@n = ds.nrows
|
|
101
|
+
|
|
102
|
+
@n_samples=0
|
|
103
|
+
@alpha=ALPHA
|
|
104
|
+
@debug=false
|
|
105
|
+
if y_var.is_a? Array
|
|
106
|
+
@fields=ds.vectors.to_a - y_var
|
|
107
|
+
@regression_class=Regression::Multiple::MultipleDependent
|
|
108
|
+
|
|
109
|
+
else
|
|
110
|
+
@fields=ds.vectors.to_a - [y_var]
|
|
111
|
+
@regression_class=Regression::Multiple::MatrixEngine
|
|
112
|
+
end
|
|
113
|
+
@samples_ga=@fields.inject({}) { |a,v| a[v]=[]; a }
|
|
114
|
+
|
|
115
|
+
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.vectors.to_a.join(",") , @y_var]
|
|
116
|
+
opts.each{|k,v|
|
|
117
|
+
self.send("#{k}=",v) if self.respond_to? k
|
|
118
|
+
}
|
|
119
|
+
create_samples_pairs
|
|
120
|
+
end
|
|
121
|
+
# lr_class deprecated
|
|
122
|
+
alias_method :lr_class, :regression_class
|
|
123
|
+
def da
|
|
124
|
+
if @da.nil?
|
|
125
|
+
@da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
|
|
126
|
+
end
|
|
127
|
+
@da
|
|
128
|
+
end
|
|
129
|
+
# Creates n re-samples from original dataset and store result of
|
|
130
|
+
# each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
|
|
131
|
+
#
|
|
132
|
+
# * number_samples: Number of new samples to add
|
|
133
|
+
# * n: size of each new sample. If nil, equal to original sample size
|
|
134
|
+
def bootstrap(number_samples,n=nil)
|
|
135
|
+
number_samples.times{ |t|
|
|
136
|
+
@n_samples+=1
|
|
137
|
+
puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
|
|
138
|
+
ds_boot=@ds.bootstrap(n)
|
|
139
|
+
da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
|
|
140
|
+
|
|
141
|
+
da_1.total_dominance.each{|k,v|
|
|
142
|
+
@samples_td[k].push(v)
|
|
143
|
+
}
|
|
144
|
+
da_1.conditional_dominance.each{|k,v|
|
|
145
|
+
@samples_cd[k].push(v)
|
|
146
|
+
}
|
|
147
|
+
da_1.general_dominance.each{|k,v|
|
|
148
|
+
@samples_gd[k].push(v)
|
|
149
|
+
}
|
|
150
|
+
da_1.general_averages.each{|k,v|
|
|
151
|
+
@samples_ga[k].push(v)
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
end
|
|
155
|
+
def create_samples_pairs
|
|
156
|
+
@samples_td={}
|
|
157
|
+
@samples_cd={}
|
|
158
|
+
@samples_gd={}
|
|
159
|
+
@pairs=[]
|
|
160
|
+
c=(0...@fields.size).to_a.combination(2)
|
|
161
|
+
c.each do |data|
|
|
162
|
+
p data
|
|
163
|
+
convert=data.collect {|i| @fields[i] }
|
|
164
|
+
@pairs.push(convert)
|
|
165
|
+
[@samples_td, @samples_cd, @samples_gd].each{|s|
|
|
166
|
+
s[convert]=[]
|
|
167
|
+
}
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
def t
|
|
171
|
+
Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
|
|
172
|
+
end
|
|
173
|
+
def report_building(builder) # :nodoc:
|
|
174
|
+
raise "You should bootstrap first" if @n_samples==0
|
|
175
|
+
builder.section(:name=>@name) do |generator|
|
|
176
|
+
generator.text _("Sample size: %d\n") % @n_samples
|
|
177
|
+
generator.text "t: #{t}\n"
|
|
178
|
+
generator.text _("Linear Regression Engine: %s") % @regression_class.name
|
|
179
|
+
|
|
180
|
+
table=ReportBuilder::Table.new(:name=>"Bootstrap report", :header => [_("pairs"), "sD","Dij", _("SE(Dij)"), "Pij", "Pji", "Pno", _("Reproducibility")])
|
|
181
|
+
table.row([_("Complete dominance"),"","","","","","",""])
|
|
182
|
+
table.hr
|
|
183
|
+
@pairs.each{|pair|
|
|
184
|
+
std=Daru::Vector.new(@samples_td[pair])
|
|
185
|
+
ttd=da.total_dominance_pairwise(pair[0],pair[1])
|
|
186
|
+
table.row(summary_pairs(pair,std,ttd))
|
|
187
|
+
}
|
|
188
|
+
table.hr
|
|
189
|
+
table.row([_("Conditional dominance"),"","","","","","",""])
|
|
190
|
+
table.hr
|
|
191
|
+
@pairs.each{|pair|
|
|
192
|
+
std=Daru::Vector.new(@samples_cd[pair])
|
|
193
|
+
ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
|
|
194
|
+
table.row(summary_pairs(pair,std,ttd))
|
|
195
|
+
|
|
196
|
+
}
|
|
197
|
+
table.hr
|
|
198
|
+
table.row([_("General Dominance"),"","","","","","",""])
|
|
199
|
+
table.hr
|
|
200
|
+
@pairs.each{|pair|
|
|
201
|
+
std=Daru::Vector.new(@samples_gd[pair])
|
|
202
|
+
ttd=da.general_dominance_pairwise(pair[0],pair[1])
|
|
203
|
+
table.row(summary_pairs(pair,std,ttd))
|
|
204
|
+
}
|
|
205
|
+
generator.parse_element(table)
|
|
206
|
+
|
|
207
|
+
table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
|
|
208
|
+
|
|
209
|
+
@fields.each{|f|
|
|
210
|
+
v=Daru::Vector.new(@samples_ga[f])
|
|
211
|
+
row=[@ds[f].name, sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
|
|
212
|
+
table.row(row)
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
generator.parse_element(table)
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
def summary_pairs(pair,std,ttd)
|
|
219
|
+
freqs=std.proportions
|
|
220
|
+
[0, 0.5, 1].each{|n|
|
|
221
|
+
freqs[n]=0 if freqs[n].nil?
|
|
222
|
+
}
|
|
223
|
+
name="%s - %s" % [@ds[pair[0]].name, @ds[pair[1]].name]
|
|
224
|
+
[name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
|
|
225
|
+
end
|
|
226
|
+
def f(v,n=3)
|
|
227
|
+
prec="%0.#{n}f"
|
|
228
|
+
sprintf(prec,v)
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
require 'statsample/factor/rotation'
|
|
2
|
+
require 'statsample/factor/pca'
|
|
3
|
+
require 'statsample/factor/principalaxis'
|
|
4
|
+
require 'statsample/factor/parallelanalysis'
|
|
5
|
+
require 'statsample/factor/map'
|
|
6
|
+
|
|
7
|
+
module Statsample
|
|
8
|
+
# Factor Analysis toolbox.
|
|
9
|
+
# * Classes for Extraction of factors:
|
|
10
|
+
# * Statsample::Factor::PCA
|
|
11
|
+
# * Statsample::Factor::PrincipalAxis
|
|
12
|
+
# * Classes for Rotation of factors:
|
|
13
|
+
# * Statsample::Factor::Varimax
|
|
14
|
+
# * Statsample::Factor::Equimax
|
|
15
|
+
# * Statsample::Factor::Quartimax
|
|
16
|
+
# * Classes for determining the number of components
|
|
17
|
+
# * Statsample::Factor::MAP
|
|
18
|
+
# * Statsample::Factor::ParallelAnalysis
|
|
19
|
+
#
|
|
20
|
+
# About number of components, O'Connor(2000) said:
|
|
21
|
+
# The two procedures [PA and MAP ] complement each other nicely,
|
|
22
|
+
# in that the MAP tends to err (when it does err) in the direction
|
|
23
|
+
# of underextraction, whereas parallel analysis tends to err
|
|
24
|
+
# (when it does err) in the direction of overextraction.
|
|
25
|
+
# Optimal decisions are thus likely to be made after considering
|
|
26
|
+
# the results of both analytic procedures. (p.10)
|
|
27
|
+
|
|
28
|
+
module Factor
|
|
29
|
+
# Anti-image covariance matrix.
|
|
30
|
+
# Useful for inspection of desireability of data for factor analysis.
|
|
31
|
+
# According to Dziuban & Shirkey (1974, p.359):
|
|
32
|
+
# "If this matrix does not exhibit many zero off-diagonal elements,
|
|
33
|
+
# the investigator has evidence that the correlation
|
|
34
|
+
# matrix is not appropriate for factor analysis."
|
|
35
|
+
#
|
|
36
|
+
def self.anti_image_covariance_matrix(matrix)
|
|
37
|
+
s2=Matrix.diagonal(*(matrix.inverse.diagonal)).inverse
|
|
38
|
+
aicm=(s2)*matrix.inverse*(s2)
|
|
39
|
+
aicm.extend(Statsample::CovariateMatrix)
|
|
40
|
+
aicm.fields=matrix.fields if matrix.respond_to? :fields
|
|
41
|
+
aicm
|
|
42
|
+
end
|
|
43
|
+
def self.anti_image_correlation_matrix(matrix)
|
|
44
|
+
matrix=matrix.to_matrix
|
|
45
|
+
s=Matrix.diagonal(*(matrix.inverse.diagonal)).sqrt.inverse
|
|
46
|
+
aicm=s*matrix.inverse*s
|
|
47
|
+
|
|
48
|
+
aicm.extend(Statsample::CovariateMatrix)
|
|
49
|
+
aicm.fields=matrix.fields if matrix.respond_to? :fields
|
|
50
|
+
aicm
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Kaiser-Meyer-Olkin measure of sampling adequacy for correlation matrix.
|
|
54
|
+
#
|
|
55
|
+
# Kaiser's (1974, cited on Dziuban & Shirkey, 1974) present calibration of the index is as follows :
|
|
56
|
+
# * .90s—marvelous
|
|
57
|
+
# * .80s— meritorious
|
|
58
|
+
# * .70s—middling
|
|
59
|
+
# * .60s—mediocre
|
|
60
|
+
# * .50s—miserable
|
|
61
|
+
# * .50 •—unacceptable
|
|
62
|
+
def self.kmo(matrix)
|
|
63
|
+
q=anti_image_correlation_matrix(matrix)
|
|
64
|
+
n=matrix.row_size
|
|
65
|
+
sum_r,sum_q=0,0
|
|
66
|
+
n.times do |j|
|
|
67
|
+
n.times do |k|
|
|
68
|
+
if j!=k
|
|
69
|
+
sum_r+=matrix[j,k]**2
|
|
70
|
+
sum_q+=q[j,k]**2
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
sum_r.quo(sum_r+sum_q)
|
|
75
|
+
end
|
|
76
|
+
# Kaiser-Meyer-Olkin measure of sampling adequacy for one variable.
|
|
77
|
+
#
|
|
78
|
+
def self.kmo_univariate(matrix, var)
|
|
79
|
+
if var.is_a? String
|
|
80
|
+
if matrix.respond_to? :fields
|
|
81
|
+
j=matrix.fields.index(var)
|
|
82
|
+
raise "Matrix doesn't have field #{var}" if j.nil?
|
|
83
|
+
else
|
|
84
|
+
raise "Matrix doesn't respond to fields"
|
|
85
|
+
end
|
|
86
|
+
else
|
|
87
|
+
j=var
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
q=anti_image_correlation_matrix(matrix)
|
|
91
|
+
n=matrix.row_size
|
|
92
|
+
|
|
93
|
+
sum_r,sum_q=0,0
|
|
94
|
+
|
|
95
|
+
n.times do |k|
|
|
96
|
+
if j!=k
|
|
97
|
+
sum_r+=matrix[j,k]**2
|
|
98
|
+
sum_q+=q[j,k]**2
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
sum_r.quo(sum_r+sum_q)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
module Statsample
|
|
2
|
+
module Factor
|
|
3
|
+
# = Velicer's Minimum Average Partial
|
|
4
|
+
#
|
|
5
|
+
# "Velicer’s (1976) MAP test involves a complete princi-
|
|
6
|
+
# pal components analysis followed by the examination of
|
|
7
|
+
# a series of matrices of partial correlations. Specifically,
|
|
8
|
+
# on the first step, the first principal component is par-
|
|
9
|
+
# tialed out of the correlations between the variables of in-
|
|
10
|
+
# terest, and the average squared coefficient in the off-
|
|
11
|
+
# diagonals of the resulting partial correlation matrix is
|
|
12
|
+
# computed. On the second step, the first two principal
|
|
13
|
+
# components are partialed out of the original correlation
|
|
14
|
+
# matrix and the average squared partial correlation is
|
|
15
|
+
# again computed. These computations are conducted for k
|
|
16
|
+
# (the number of variables) minus one steps. The average
|
|
17
|
+
# squared partial correlations from these steps are then
|
|
18
|
+
# lined up, and the number of components is determined by
|
|
19
|
+
# the step number in the analyses that resulted in the lowest
|
|
20
|
+
# average squared partial correlation. The average squared
|
|
21
|
+
# coefficient in the original correlation matrix is also com-
|
|
22
|
+
# puted, and if this coefficient happens to be lower than
|
|
23
|
+
# the lowest average squared partial correlation, then no
|
|
24
|
+
# components should be extracted from the correlation ma-
|
|
25
|
+
# trix. Statistically, components are retained as long as the
|
|
26
|
+
# variance in the correlation matrix represents systematic
|
|
27
|
+
# variance. Components are no longer retained when there
|
|
28
|
+
# is proportionately more unsystematic variance than sys-
|
|
29
|
+
# tematic variance." (O'Connor, 2000, p.397).
|
|
30
|
+
#
|
|
31
|
+
# Current algorithm is loosely based on SPSS O'Connor algorithm
|
|
32
|
+
#
|
|
33
|
+
# == Reference
|
|
34
|
+
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
|
|
35
|
+
#
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MAP
|
|
40
|
+
include Summarizable
|
|
41
|
+
include DirtyMemoize
|
|
42
|
+
# Name of analysis
|
|
43
|
+
attr_accessor :name
|
|
44
|
+
attr_reader :eigenvalues
|
|
45
|
+
# Number of factors to retain
|
|
46
|
+
attr_reader :number_of_factors
|
|
47
|
+
# Average squared correlations
|
|
48
|
+
attr_reader :fm
|
|
49
|
+
# Smallest average squared correlation
|
|
50
|
+
attr_reader :minfm
|
|
51
|
+
|
|
52
|
+
attr_accessor :use_gsl
|
|
53
|
+
def self.with_dataset(ds,opts=Hash.new)
|
|
54
|
+
new(ds.correlation_matrix,opts)
|
|
55
|
+
end
|
|
56
|
+
def initialize(matrix, opts=Hash.new)
|
|
57
|
+
@matrix=matrix
|
|
58
|
+
opts_default={
|
|
59
|
+
:use_gsl=>true,
|
|
60
|
+
:name=>_("Velicer's MAP")
|
|
61
|
+
}
|
|
62
|
+
@opts=opts_default.merge(opts)
|
|
63
|
+
opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
|
|
64
|
+
end
|
|
65
|
+
def compute
|
|
66
|
+
gsl_m=(use_gsl and Statsample.has_gsl?) ? @matrix.to_gsl : @matrix
|
|
67
|
+
klass_m=gsl_m.class
|
|
68
|
+
eigvect,@eigenvalues=gsl_m.eigenvectors_matrix, gsl_m.eigenvalues
|
|
69
|
+
eigenvalues_sqrt=@eigenvalues.collect {|v| Math.sqrt(v)}
|
|
70
|
+
loadings=eigvect*(klass_m.diagonal(*eigenvalues_sqrt))
|
|
71
|
+
fm=Array.new(@matrix.row_size)
|
|
72
|
+
ncol=@matrix.column_size
|
|
73
|
+
|
|
74
|
+
fm[0]=(gsl_m.mssq - ncol).quo(ncol*(ncol-1))
|
|
75
|
+
|
|
76
|
+
(ncol-1).times do |m|
|
|
77
|
+
puts "MAP:Eigenvalue #{m+1}" if $DEBUG
|
|
78
|
+
a=use_gsl ? loadings[0..(loadings.row_size-1),0..m] :
|
|
79
|
+
loadings.minor(0..(loadings.row_size-1),0..m)
|
|
80
|
+
partcov= gsl_m - (a*a.transpose)
|
|
81
|
+
|
|
82
|
+
d=klass_m.diagonal(*(partcov.diagonal.collect {|v| Math::sqrt(1/v)}))
|
|
83
|
+
pr=d*partcov*d
|
|
84
|
+
fm[m+1]=(pr.mssq-ncol).quo(ncol*(ncol-1))
|
|
85
|
+
end
|
|
86
|
+
minfm=fm[0]
|
|
87
|
+
nfactors=0
|
|
88
|
+
@errors=[]
|
|
89
|
+
fm.each_with_index do |v,s|
|
|
90
|
+
if defined?(Complex) and v.is_a? ::Complex
|
|
91
|
+
@errors.push(s)
|
|
92
|
+
else
|
|
93
|
+
if v < minfm
|
|
94
|
+
minfm=v
|
|
95
|
+
nfactors=s
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
@number_of_factors=nfactors
|
|
100
|
+
@fm=fm
|
|
101
|
+
@minfm=minfm
|
|
102
|
+
|
|
103
|
+
end
|
|
104
|
+
def report_building(g) #:nodoc:
|
|
105
|
+
g.section(:name=>@name) do |s|
|
|
106
|
+
s.table(:name=>_("Eigenvalues"),:header=>[_("Value")]) do |t|
|
|
107
|
+
eigenvalues.each_with_index do |e,i|
|
|
108
|
+
t.row([@errors.include?(i) ? "*" : "%0.6f" % e])
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
s.table(:name=>_("Velicer's Average Squared Correlations"), :header=>[_("number of components"),_("average square correlation")]) do |t|
|
|
112
|
+
fm.each_with_index do |v,i|
|
|
113
|
+
t.row(["%d" % i, @errors.include?(i) ? "*" : "%0.6f" % v])
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
s.text(_("The smallest average squared correlation is : %0.6f" % minfm))
|
|
117
|
+
s.text(_("The number of components is : %d" % number_of_factors))
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
dirty_memoize :number_of_factors, :fm, :minfm, :eigenvalues
|
|
121
|
+
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|