statsample 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
data/History.txt
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
=== 0.6.1 / 2010-02-08
|
2
|
+
* Bug fix on DominanceAnalysis summary for Ruby1.9
|
3
|
+
* Some extra documentation
|
1
4
|
=== 0.6.0 / 2010-02-05
|
2
5
|
* New Statsample::Factor module. Include classes for extracting factors (Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis) and rotate component matrix ( Statsample::Factor::Rotation subclasses). For now, only orthogonal rotations
|
3
6
|
* New Statsample::Dataset.crosstab_with_asignation, Statsample::Dataset.one_to_many
|
data/lib/statsample.rb
CHANGED
@@ -27,11 +27,11 @@ module Statsample
|
|
27
27
|
# See http://www.john-uebersax.com/stat/tetra.htm for extensive
|
28
28
|
# documentation about tetrachoric correlation.
|
29
29
|
#
|
30
|
-
# This class uses algorithm
|
31
|
-
# vol.26, no.3.
|
32
|
-
#
|
33
|
-
# You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
|
30
|
+
# This class uses Brown(1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
|
34
31
|
#
|
32
|
+
# == References:
|
33
|
+
# * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
|
34
|
+
#
|
35
35
|
# <b>Usage</b>.
|
36
36
|
# With two variables x and y on a crosstab like this:
|
37
37
|
#
|
@@ -1,269 +1,321 @@
|
|
1
1
|
require 'statsample/dominanceanalysis/bootstrap'
|
2
2
|
module Statsample
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
3
|
+
# Dominance Analysis is a procedure based on an examination of the R2 values
|
4
|
+
# for all possible subset models, to identify the relevance of one or more
|
5
|
+
# predictors in the prediction of criterium.
|
6
|
+
#
|
7
|
+
# See Budescu(1993) and Azen & Budescu (2003) for more information.
|
8
|
+
# Use:
|
9
|
+
# a=1000.times.collect {rand}.to_scale
|
10
|
+
# b=1000.times.collect {rand}.to_scale
|
11
|
+
# c=1000.times.collect {rand}.to_scale
|
12
|
+
# ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
13
|
+
# ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
|
14
|
+
# da=Statsample::DominanceAnalysis.new(ds,'y')
|
15
|
+
# puts da.summary
|
16
|
+
# ==>
|
17
|
+
# Resultado del Analisis de Dominancia de a, b, c en y
|
18
|
+
#
|
19
|
+
# ----------------------------------------------------------------
|
20
|
+
# | | r2 | sign | a | b | c |
|
21
|
+
# ----------------------------------------------------------------
|
22
|
+
# | Modelo 0 | | | 0.637 | 0.260 | 0.115 |
|
23
|
+
# ----------------------------------------------------------------
|
24
|
+
# | a | 0.637 | 0.000 | -- | 0.239 | 0.109 |
|
25
|
+
# | b | 0.260 | 0.000 | 0.617 | -- | 0.103 |
|
26
|
+
# | c | 0.115 | 0.000 | 0.632 | 0.249 | -- |
|
27
|
+
# ----------------------------------------------------------------
|
28
|
+
# | k=1 Promedio | | | 0.624 | 0.244 | 0.106 |
|
29
|
+
# ----------------------------------------------------------------
|
30
|
+
# | a*b | 0.877 | 0.000 | -- | -- | 0.098 |
|
31
|
+
# | a*c | 0.746 | 0.000 | -- | 0.229 | -- |
|
32
|
+
# | b*c | 0.363 | 0.000 | 0.612 | -- | -- |
|
33
|
+
# ----------------------------------------------------------------
|
34
|
+
# | k=2 Promedio | | | 0.612 | 0.229 | 0.098 |
|
35
|
+
# ----------------------------------------------------------------
|
36
|
+
# | a*b*c | 0.975 | 0.000 | -- | -- | -- |
|
37
|
+
# ----------------------------------------------------------------
|
38
|
+
# | Promedios generales | | | 0.624 | 0.244 | 0.106 |
|
39
|
+
# ----------------------------------------------------------------
|
40
|
+
#
|
41
|
+
# De a pares
|
42
|
+
#
|
43
|
+
# ----------------------------
|
44
|
+
# | Pares | T | C | G |
|
45
|
+
# ----------------------------
|
46
|
+
# | a - b | 1.0 | 1.0 | 1.0 |
|
47
|
+
# | a - c | 1.0 | 1.0 | 1.0 |
|
48
|
+
# | b - c | 1.0 | 1.0 | 1.0 |
|
49
|
+
# ----------------------------
|
50
|
+
#
|
51
|
+
# == References:
|
52
|
+
# * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. _Psychological Bulletin, 114_, 542-551.
|
53
|
+
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
|
54
|
+
class DominanceAnalysis
|
55
|
+
include GetText
|
56
|
+
bindtextdomain("statsample")
|
57
|
+
# Creates a new DominanceAnalysis object
|
58
|
+
# Params:
|
59
|
+
# * ds: A Dataset object
|
60
|
+
# * y_var: Name of dependent variable
|
61
|
+
# * r_class: Class to generate the regressions. Could be any subclass of
|
62
|
+
# Statsample::Regression::Multiple::BaseEngine
|
63
|
+
#
|
64
|
+
def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
|
65
|
+
@y_var=y_var
|
66
|
+
@dy=ds[@y_var]
|
67
|
+
@ds=ds
|
68
|
+
@r_class=r_class
|
69
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
70
|
+
@fields=@ds_indep.fields
|
71
|
+
create_models
|
72
|
+
fill_models
|
73
|
+
end
|
74
|
+
def fill_models
|
75
|
+
@models.each do |m|
|
76
|
+
@fields.each do |f|
|
77
|
+
next if m.include? f
|
78
|
+
base_model=md(m)
|
79
|
+
comp_model=md(m+[f])
|
80
|
+
base_model.add_contribution(f,comp_model.r2)
|
74
81
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
+
end
|
83
|
+
end
|
84
|
+
def dominance_for_nil_model(i,j)
|
85
|
+
if md([i]).r2>md([j]).r2
|
86
|
+
1
|
87
|
+
elsif md([i]).r2<md([j]).r2
|
88
|
+
0
|
89
|
+
else
|
90
|
+
0.5
|
91
|
+
end
|
92
|
+
end
|
93
|
+
# Returns 1 if i D k, 0 if j dominates i and 0.5 if undetermined
|
94
|
+
def total_dominance_pairwise(i,j)
|
95
|
+
dm=dominance_for_nil_model(i,j)
|
96
|
+
return 0.5 if dm==0.5
|
97
|
+
dominances=[dm]
|
98
|
+
@models_data.each do |k,m|
|
99
|
+
if !m.contributions[i].nil? and !m.contributions[j].nil?
|
100
|
+
if m.contributions[i]>m.contributions[j]
|
101
|
+
dominances.push(1)
|
102
|
+
elsif m.contributions[i]<m.contributions[j]
|
103
|
+
dominances.push(0)
|
82
104
|
else
|
83
|
-
|
84
|
-
|
85
|
-
end
|
86
|
-
def pairs
|
87
|
-
@models.find_all{|m| m.size==2}
|
88
|
-
end
|
89
|
-
def total_dominance
|
90
|
-
pairs.inject({}){|a,pair|
|
91
|
-
a[pair]=total_dominance_pairwise(pair[0], pair[1])
|
92
|
-
a
|
93
|
-
}
|
94
|
-
end
|
95
|
-
def conditional_dominance
|
96
|
-
pairs.inject({}){|a,pair|
|
97
|
-
a[pair]=conditional_dominance_pairwise(pair[0], pair[1])
|
98
|
-
a
|
99
|
-
}
|
100
|
-
end
|
101
|
-
def general_dominance
|
102
|
-
pairs.inject({}){|a,pair|
|
103
|
-
a[pair]=general_dominance_pairwise(pair[0], pair[1])
|
104
|
-
a
|
105
|
-
}
|
106
|
-
end
|
107
|
-
|
108
|
-
def md(m)
|
109
|
-
@models_data[m.sort]
|
110
|
-
end
|
111
|
-
# Get all model of size k
|
112
|
-
def md_k(k)
|
113
|
-
out=[]
|
114
|
-
models=@models.each{|m| out.push(md(m)) if m.size==k }
|
115
|
-
out
|
116
|
-
end
|
117
|
-
|
118
|
-
# For a hash with arrays of numbers as values
|
119
|
-
# Returns a hash with same keys and
|
120
|
-
# value as the mean of values of original hash
|
121
|
-
|
122
|
-
def get_averages(averages)
|
123
|
-
out={}
|
124
|
-
averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
|
125
|
-
out
|
126
|
-
end
|
127
|
-
# Hash with average for each k size
|
128
|
-
# model
|
129
|
-
def average_k(k)
|
130
|
-
return nil if k==@fields.size
|
131
|
-
models=md_k(k)
|
132
|
-
averages=@fields.inject({}) {|a,v| a[v]=[];a}
|
133
|
-
models.each do |m|
|
134
|
-
@fields.each do |f|
|
135
|
-
averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
|
136
|
-
end
|
105
|
+
return 0.5
|
106
|
+
#dominances.push(0.5)
|
137
107
|
end
|
138
|
-
get_averages(averages)
|
139
108
|
end
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
109
|
+
end
|
110
|
+
final=dominances.uniq
|
111
|
+
final.size>1 ? 0.5 : final[0]
|
112
|
+
end
|
113
|
+
|
114
|
+
# Returns 1 if i cD k, 0 if j cD i and 0.5 if undetermined
|
115
|
+
def conditional_dominance_pairwise(i,j)
|
116
|
+
dm=dominance_for_nil_model(i,j)
|
117
|
+
return 0.5 if dm==0.5
|
118
|
+
dominances=[dm]
|
119
|
+
for k in 1...@fields.size
|
120
|
+
a=average_k(k)
|
121
|
+
if a[i]>a[j]
|
122
|
+
dominances.push(1)
|
123
|
+
elsif a[i]<a[j]
|
124
|
+
dominances.push(0)
|
125
|
+
else
|
126
|
+
return 0.5
|
127
|
+
dominances.push(0.5)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
final=dominances.uniq
|
131
|
+
final.size>1 ? 0.5 : final[0]
|
132
|
+
end
|
133
|
+
# Returns 1 if i gD k, 0 if j gD i and 0.5 if undetermined
|
134
|
+
def general_dominance_pairwise(i,j)
|
135
|
+
ga=general_averages
|
136
|
+
if ga[i]>ga[j]
|
137
|
+
1
|
138
|
+
elsif ga[i]<ga[j]
|
139
|
+
0
|
140
|
+
else
|
141
|
+
0.5
|
142
|
+
end
|
143
|
+
end
|
144
|
+
def pairs
|
145
|
+
@models.find_all{|m| m.size==2}
|
146
|
+
end
|
147
|
+
def total_dominance
|
148
|
+
pairs.inject({}){|a,pair| a[pair]=total_dominance_pairwise(pair[0], pair[1])
|
149
|
+
a
|
150
|
+
}
|
151
|
+
end
|
152
|
+
def conditional_dominance
|
153
|
+
pairs.inject({}){|a,pair|
|
154
|
+
a[pair]=conditional_dominance_pairwise(pair[0], pair[1])
|
155
|
+
a
|
156
|
+
}
|
157
|
+
end
|
158
|
+
def general_dominance
|
159
|
+
pairs.inject({}){|a,pair|
|
160
|
+
a[pair]=general_dominance_pairwise(pair[0], pair[1])
|
161
|
+
a
|
162
|
+
}
|
163
|
+
end
|
164
|
+
|
165
|
+
def md(m)
|
166
|
+
@models_data[m.sort]
|
167
|
+
end
|
168
|
+
# Get all model of size k
|
169
|
+
def md_k(k)
|
170
|
+
out=[]
|
171
|
+
models=@models.each{|m| out.push(md(m)) if m.size==k }
|
172
|
+
out
|
173
|
+
end
|
174
|
+
|
175
|
+
# For a hash with arrays of numbers as values
|
176
|
+
# Returns a hash with same keys and
|
177
|
+
# value as the mean of values of original hash
|
178
|
+
|
179
|
+
def get_averages(averages)
|
180
|
+
out={}
|
181
|
+
averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
|
182
|
+
out
|
183
|
+
end
|
184
|
+
# Hash with average for each k size model.
|
185
|
+
def average_k(k)
|
186
|
+
return nil if k==@fields.size
|
187
|
+
models=md_k(k)
|
188
|
+
averages=@fields.inject({}) {|a,v| a[v]=[];a}
|
189
|
+
models.each do |m|
|
190
|
+
@fields.each do |f|
|
191
|
+
averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
|
152
192
|
end
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
ds_prev=@ds.dup(convert+[@y_var])
|
164
|
-
modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
|
165
|
-
@models_data[convert.sort]=modeldata
|
166
|
-
}
|
193
|
+
end
|
194
|
+
get_averages(averages)
|
195
|
+
end
|
196
|
+
def general_averages
|
197
|
+
if @general_averages.nil?
|
198
|
+
averages=@fields.inject({}) {|a,v| a[v]=[md([v]).r2];a}
|
199
|
+
for k in 1...@fields.size
|
200
|
+
ak=average_k(k)
|
201
|
+
@fields.each do |f|
|
202
|
+
averages[f].push(ak[f])
|
167
203
|
end
|
168
204
|
end
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
name=p.join(" - ")
|
216
|
-
row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
|
217
|
-
t.add_row(row)
|
218
|
-
}
|
219
|
-
out.parse_table(t)
|
220
|
-
return out
|
221
|
-
end
|
222
|
-
class ModelData
|
223
|
-
attr_reader :contributions
|
224
|
-
def initialize(name,ds,y_var,fields,r_class)
|
225
|
-
@name=name
|
226
|
-
@fields=fields
|
227
|
-
@contributions=@fields.inject({}){|a,v| a[v]=nil;a}
|
228
|
-
r_class=Regression::Multiple::RubyEngine if r_class.nil?
|
229
|
-
@lr=r_class.new(ds,y_var)
|
230
|
-
end
|
231
|
-
def add_contribution(f,v)
|
232
|
-
@contributions[f]=v-r2
|
233
|
-
end
|
234
|
-
def r2
|
235
|
-
@lr.r2
|
236
|
-
end
|
237
|
-
def add_table_row
|
238
|
-
begin
|
239
|
-
sign=sprintf("%0.3f", @lr.significance)
|
240
|
-
rescue RuntimeError
|
241
|
-
sign="???"
|
242
|
-
end
|
243
|
-
[@name.join("*"), sprintf("%0.3f",r2), sign] + @fields.collect{|k|
|
244
|
-
v=@contributions[k]
|
245
|
-
if v.nil?
|
246
|
-
"--"
|
247
|
-
else
|
248
|
-
sprintf("%0.3f",v)
|
249
|
-
end
|
250
|
-
}
|
251
|
-
end
|
252
|
-
def summary
|
253
|
-
out=sprintf("%s: r2=%0.3f(p=%0.2f)\n",@name.join("*"),r2,@lr.significance,@lr.sst)
|
254
|
-
out << @fields.collect{|k|
|
255
|
-
v=@contributions[k]
|
256
|
-
if v.nil?
|
257
|
-
"--"
|
258
|
-
else
|
259
|
-
sprintf("%s=%0.3f",k,v)
|
260
|
-
end
|
261
|
-
}.join(" | ")
|
262
|
-
out << "\n"
|
263
|
-
|
264
|
-
return out
|
265
|
-
end
|
266
|
-
end
|
205
|
+
@general_averages=get_averages(averages)
|
206
|
+
end
|
207
|
+
@general_averages
|
208
|
+
end
|
209
|
+
def create_models
|
210
|
+
@models=[]
|
211
|
+
@models_data={}
|
212
|
+
for i in 1..@fields.size
|
213
|
+
c=Statsample::Combination.new(i,@fields.size)
|
214
|
+
c.each do |data|
|
215
|
+
convert=data.collect {|i1| @fields[i1] }
|
216
|
+
@models.push(convert)
|
217
|
+
ds_prev=@ds.dup(convert+[@y_var])
|
218
|
+
modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
|
219
|
+
@models_data[convert.sort]=modeldata
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
def summary(report_type=ConsoleSummary)
|
224
|
+
out=""
|
225
|
+
out.extend report_type
|
226
|
+
out << _("Summary for Dominance Analysis of %s on %s\n") % [@fields.join(", "),@y_var]
|
227
|
+
t=Statsample::ReportTable.new
|
228
|
+
t.header=["","r2","sign"]+@fields
|
229
|
+
row=[_("Model 0"),"",""]+@fields.collect{|f|
|
230
|
+
sprintf("%0.3f", md([f]).r2)
|
231
|
+
}
|
232
|
+
t.add_row(row)
|
233
|
+
t.add_horizontal_line
|
234
|
+
for i in 1..@fields.size
|
235
|
+
mk=md_k(i)
|
236
|
+
mk.each{|m|
|
237
|
+
t.add_row(m.add_table_row)
|
238
|
+
}
|
239
|
+
# Report averages
|
240
|
+
a=average_k(i)
|
241
|
+
if !a.nil?
|
242
|
+
t.add_horizontal_line
|
243
|
+
row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
|
244
|
+
sprintf("%0.3f",a[f])
|
245
|
+
}
|
246
|
+
t.add_row(row)
|
247
|
+
t.add_horizontal_line
|
248
|
+
|
249
|
+
end
|
250
|
+
|
267
251
|
end
|
268
252
|
|
253
|
+
g=general_averages
|
254
|
+
t.add_horizontal_line
|
255
|
+
|
256
|
+
row=[_("Overall averages"),"",""]+@fields.collect{|f|
|
257
|
+
sprintf("%0.3f",g[f])
|
258
|
+
}
|
259
|
+
t.add_row(row)
|
260
|
+
out.parse_table(t)
|
261
|
+
|
262
|
+
out.nl
|
263
|
+
out << _("Pairwise")+"\n"
|
264
|
+
td=total_dominance
|
265
|
+
cd=conditional_dominance
|
266
|
+
gd=general_dominance
|
267
|
+
t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
|
268
|
+
pairs.each{|p|
|
269
|
+
name=p.join(" - ")
|
270
|
+
row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
|
271
|
+
t.add_row(row)
|
272
|
+
}
|
273
|
+
out.parse_table(t)
|
274
|
+
return out
|
275
|
+
end
|
276
|
+
class ModelData
|
277
|
+
attr_reader :contributions
|
278
|
+
def initialize(name,ds,y_var,fields,r_class)
|
279
|
+
@name=name
|
280
|
+
@fields=fields
|
281
|
+
@contributions=@fields.inject({}){|a,v| a[v]=nil;a}
|
282
|
+
r_class=Regression::Multiple::RubyEngine if r_class.nil?
|
283
|
+
@lr=r_class.new(ds,y_var)
|
284
|
+
end
|
285
|
+
def add_contribution(f,v)
|
286
|
+
@contributions[f]=v-r2
|
287
|
+
end
|
288
|
+
def r2
|
289
|
+
@lr.r2
|
290
|
+
end
|
291
|
+
def add_table_row
|
292
|
+
begin
|
293
|
+
sign=sprintf("%0.3f", @lr.significance)
|
294
|
+
rescue RuntimeError
|
295
|
+
sign="???"
|
296
|
+
end
|
297
|
+
[@name.join("*"), sprintf("%0.3f",r2), sign] + @fields.collect{|k|
|
298
|
+
v=@contributions[k]
|
299
|
+
if v.nil?
|
300
|
+
"--"
|
301
|
+
else
|
302
|
+
sprintf("%0.3f",v)
|
303
|
+
end
|
304
|
+
}
|
305
|
+
end
|
306
|
+
def summary
|
307
|
+
out=sprintf("%s: r2=%0.3f(p=%0.2f)\n",@name.join("*"),r2,@lr.significance,@lr.sst)
|
308
|
+
out << @fields.collect{|k|
|
309
|
+
v=@contributions[k]
|
310
|
+
if v.nil?
|
311
|
+
"--"
|
312
|
+
else
|
313
|
+
sprintf("%s=%0.3f",k,v)
|
314
|
+
end
|
315
|
+
}.join(" | ")
|
316
|
+
out << "\n"
|
317
|
+
return out
|
318
|
+
end
|
319
|
+
end # end ModelData
|
320
|
+
end # end Dominance Analysis
|
269
321
|
end
|
@@ -1,5 +1,8 @@
|
|
1
1
|
module Statsample
|
2
2
|
class DominanceAnalysis
|
3
|
+
# Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
|
4
|
+
# References:
|
5
|
+
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
|
3
6
|
class Bootstrap
|
4
7
|
include GetText
|
5
8
|
include Writable
|
@@ -3,18 +3,44 @@ module Statsample
|
|
3
3
|
module Regression
|
4
4
|
# Module for Linear Multiple Regression Analysis.
|
5
5
|
#
|
6
|
-
# You can call Regression::Multiple.listwise
|
6
|
+
# You can call Statsample::Regression::Multiple.listwise, Statsample::Regression::Multiple.pairwise or instance directly the engines.
|
7
7
|
#
|
8
|
-
#
|
8
|
+
# Use:.
|
9
9
|
#
|
10
10
|
# require 'statsample'
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
# ds={'a'
|
16
|
-
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
17
|
-
#
|
11
|
+
# a=1000.times.collect {rand}.to_scale
|
12
|
+
# b=1000.times.collect {rand}.to_scale
|
13
|
+
# c=1000.times.collect {rand}.to_scale
|
14
|
+
# ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
15
|
+
# ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
|
16
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
17
|
+
# puts lr.summary
|
18
|
+
# Summary for regression of a,b,c over y
|
19
|
+
# *************************************************************
|
20
|
+
# Engine: Statsample::Regression::Multiple::AlglibEngine
|
21
|
+
# Cases(listwise)=1000(1000)
|
22
|
+
# r=0.986
|
23
|
+
# r2=0.973
|
24
|
+
# Equation=0.504+5.011a + 2.995b + 1.988c
|
25
|
+
# ----------------------------
|
26
|
+
# ANOVA TABLE
|
27
|
+
# --------------------------------------------------------------
|
28
|
+
# | source | ss | df | ms | f | s |
|
29
|
+
# --------------------------------------------------------------
|
30
|
+
# | Regression | 2979.321 | 3 | 993.107 | 12040.067 | 0.000 |
|
31
|
+
# | Error | 82.154 | 996 | 0.082 | | |
|
32
|
+
# | Total | 3061.475 | 999 | | | |
|
33
|
+
# --------------------------------------------------------------
|
34
|
+
# Beta coefficientes
|
35
|
+
# -----------------------------------------------
|
36
|
+
# | coeff | b | beta | se | t |
|
37
|
+
# -----------------------------------------------
|
38
|
+
# | Constant | 0.504 | - | 0.030 | 16.968 |
|
39
|
+
# | a | 5.011 | 0.832 | 0.031 | 159.486 |
|
40
|
+
# | b | 2.995 | 0.492 | 0.032 | 94.367 |
|
41
|
+
# | c | 1.988 | 0.323 | 0.032 | 62.132 |
|
42
|
+
# -----------------------------------------------
|
43
|
+
#
|
18
44
|
module Multiple
|
19
45
|
# Creates an object for listwise regression.
|
20
46
|
# Alglib is faster, so is prefered over GSL
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-08 00:00:00 -03:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|