statsample 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
=== 0.6.1 / 2010-02-08
|
2
|
+
* Bug fix on DominanceAnalysis summary for Ruby1.9
|
3
|
+
* Some extra documentation
|
1
4
|
=== 0.6.0 / 2010-02-05
|
2
5
|
* New Statsample::Factor module. Include classes for extracting factors (Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis) and rotate component matrix ( Statsample::Factor::Rotation subclasses). For now, only orthogonal rotations
|
3
6
|
* New Statsample::Dataset.crosstab_with_asignation, Statsample::Dataset.one_to_many
|
data/lib/statsample.rb
CHANGED
@@ -27,11 +27,11 @@ module Statsample
|
|
27
27
|
# See http://www.john-uebersax.com/stat/tetra.htm for extensive
|
28
28
|
# documentation about tetrachoric correlation.
|
29
29
|
#
|
30
|
-
# This class uses algorithm
|
31
|
-
# vol.26, no.3.
|
32
|
-
#
|
33
|
-
# You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
|
30
|
+
# This class uses Brown(1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
|
34
31
|
#
|
32
|
+
# == References:
|
33
|
+
# * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
|
34
|
+
#
|
35
35
|
# <b>Usage</b>.
|
36
36
|
# With two variables x and y on a crosstab like this:
|
37
37
|
#
|
@@ -1,269 +1,321 @@
|
|
1
1
|
require 'statsample/dominanceanalysis/bootstrap'
|
2
2
|
module Statsample
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
3
|
+
# Dominance Analysis is a procedure based on an examination of the R2 values
|
4
|
+
# for all possible subset models, to identify the relevance of one or more
|
5
|
+
# predictors in the prediction of criterium.
|
6
|
+
#
|
7
|
+
# See Budescu(1993) and Azen & Budescu (2003) for more information.
|
8
|
+
# Use:
|
9
|
+
# a=1000.times.collect {rand}.to_scale
|
10
|
+
# b=1000.times.collect {rand}.to_scale
|
11
|
+
# c=1000.times.collect {rand}.to_scale
|
12
|
+
# ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
13
|
+
# ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
|
14
|
+
# da=Statsample::DominanceAnalysis.new(ds,'y')
|
15
|
+
# puts da.summary
|
16
|
+
# ==>
|
17
|
+
# Resultado del Analisis de Dominancia de a, b, c en y
|
18
|
+
#
|
19
|
+
# ----------------------------------------------------------------
|
20
|
+
# | | r2 | sign | a | b | c |
|
21
|
+
# ----------------------------------------------------------------
|
22
|
+
# | Modelo 0 | | | 0.637 | 0.260 | 0.115 |
|
23
|
+
# ----------------------------------------------------------------
|
24
|
+
# | a | 0.637 | 0.000 | -- | 0.239 | 0.109 |
|
25
|
+
# | b | 0.260 | 0.000 | 0.617 | -- | 0.103 |
|
26
|
+
# | c | 0.115 | 0.000 | 0.632 | 0.249 | -- |
|
27
|
+
# ----------------------------------------------------------------
|
28
|
+
# | k=1 Promedio | | | 0.624 | 0.244 | 0.106 |
|
29
|
+
# ----------------------------------------------------------------
|
30
|
+
# | a*b | 0.877 | 0.000 | -- | -- | 0.098 |
|
31
|
+
# | a*c | 0.746 | 0.000 | -- | 0.229 | -- |
|
32
|
+
# | b*c | 0.363 | 0.000 | 0.612 | -- | -- |
|
33
|
+
# ----------------------------------------------------------------
|
34
|
+
# | k=2 Promedio | | | 0.612 | 0.229 | 0.098 |
|
35
|
+
# ----------------------------------------------------------------
|
36
|
+
# | a*b*c | 0.975 | 0.000 | -- | -- | -- |
|
37
|
+
# ----------------------------------------------------------------
|
38
|
+
# | Promedios generales | | | 0.624 | 0.244 | 0.106 |
|
39
|
+
# ----------------------------------------------------------------
|
40
|
+
#
|
41
|
+
# De a pares
|
42
|
+
#
|
43
|
+
# ----------------------------
|
44
|
+
# | Pares | T | C | G |
|
45
|
+
# ----------------------------
|
46
|
+
# | a - b | 1.0 | 1.0 | 1.0 |
|
47
|
+
# | a - c | 1.0 | 1.0 | 1.0 |
|
48
|
+
# | b - c | 1.0 | 1.0 | 1.0 |
|
49
|
+
# ----------------------------
|
50
|
+
#
|
51
|
+
# == References:
|
52
|
+
# * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. _Psychological Bulletin, 114_, 542-551.
|
53
|
+
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
|
54
|
+
class DominanceAnalysis
|
55
|
+
include GetText
|
56
|
+
bindtextdomain("statsample")
|
57
|
+
# Creates a new DominanceAnalysis object
|
58
|
+
# Params:
|
59
|
+
# * ds: A Dataset object
|
60
|
+
# * y_var: Name of dependent variable
|
61
|
+
# * r_class: Class to generate the regressions. Could be any subclass of
|
62
|
+
# Statsample::Regression::Multiple::BaseEngine
|
63
|
+
#
|
64
|
+
def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
|
65
|
+
@y_var=y_var
|
66
|
+
@dy=ds[@y_var]
|
67
|
+
@ds=ds
|
68
|
+
@r_class=r_class
|
69
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
70
|
+
@fields=@ds_indep.fields
|
71
|
+
create_models
|
72
|
+
fill_models
|
73
|
+
end
|
74
|
+
def fill_models
|
75
|
+
@models.each do |m|
|
76
|
+
@fields.each do |f|
|
77
|
+
next if m.include? f
|
78
|
+
base_model=md(m)
|
79
|
+
comp_model=md(m+[f])
|
80
|
+
base_model.add_contribution(f,comp_model.r2)
|
74
81
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
+
end
|
83
|
+
end
|
84
|
+
def dominance_for_nil_model(i,j)
|
85
|
+
if md([i]).r2>md([j]).r2
|
86
|
+
1
|
87
|
+
elsif md([i]).r2<md([j]).r2
|
88
|
+
0
|
89
|
+
else
|
90
|
+
0.5
|
91
|
+
end
|
92
|
+
end
|
93
|
+
# Returns 1 if i D k, 0 if j dominates i and 0.5 if undetermined
|
94
|
+
def total_dominance_pairwise(i,j)
|
95
|
+
dm=dominance_for_nil_model(i,j)
|
96
|
+
return 0.5 if dm==0.5
|
97
|
+
dominances=[dm]
|
98
|
+
@models_data.each do |k,m|
|
99
|
+
if !m.contributions[i].nil? and !m.contributions[j].nil?
|
100
|
+
if m.contributions[i]>m.contributions[j]
|
101
|
+
dominances.push(1)
|
102
|
+
elsif m.contributions[i]<m.contributions[j]
|
103
|
+
dominances.push(0)
|
82
104
|
else
|
83
|
-
|
84
|
-
|
85
|
-
end
|
86
|
-
def pairs
|
87
|
-
@models.find_all{|m| m.size==2}
|
88
|
-
end
|
89
|
-
def total_dominance
|
90
|
-
pairs.inject({}){|a,pair|
|
91
|
-
a[pair]=total_dominance_pairwise(pair[0], pair[1])
|
92
|
-
a
|
93
|
-
}
|
94
|
-
end
|
95
|
-
def conditional_dominance
|
96
|
-
pairs.inject({}){|a,pair|
|
97
|
-
a[pair]=conditional_dominance_pairwise(pair[0], pair[1])
|
98
|
-
a
|
99
|
-
}
|
100
|
-
end
|
101
|
-
def general_dominance
|
102
|
-
pairs.inject({}){|a,pair|
|
103
|
-
a[pair]=general_dominance_pairwise(pair[0], pair[1])
|
104
|
-
a
|
105
|
-
}
|
106
|
-
end
|
107
|
-
|
108
|
-
def md(m)
|
109
|
-
@models_data[m.sort]
|
110
|
-
end
|
111
|
-
# Get all model of size k
|
112
|
-
def md_k(k)
|
113
|
-
out=[]
|
114
|
-
models=@models.each{|m| out.push(md(m)) if m.size==k }
|
115
|
-
out
|
116
|
-
end
|
117
|
-
|
118
|
-
# For a hash with arrays of numbers as values
|
119
|
-
# Returns a hash with same keys and
|
120
|
-
# value as the mean of values of original hash
|
121
|
-
|
122
|
-
def get_averages(averages)
|
123
|
-
out={}
|
124
|
-
averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
|
125
|
-
out
|
126
|
-
end
|
127
|
-
# Hash with average for each k size
|
128
|
-
# model
|
129
|
-
def average_k(k)
|
130
|
-
return nil if k==@fields.size
|
131
|
-
models=md_k(k)
|
132
|
-
averages=@fields.inject({}) {|a,v| a[v]=[];a}
|
133
|
-
models.each do |m|
|
134
|
-
@fields.each do |f|
|
135
|
-
averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
|
136
|
-
end
|
105
|
+
return 0.5
|
106
|
+
#dominances.push(0.5)
|
137
107
|
end
|
138
|
-
get_averages(averages)
|
139
108
|
end
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
109
|
+
end
|
110
|
+
final=dominances.uniq
|
111
|
+
final.size>1 ? 0.5 : final[0]
|
112
|
+
end
|
113
|
+
|
114
|
+
# Returns 1 if i cD k, 0 if j cD i and 0.5 if undetermined
|
115
|
+
def conditional_dominance_pairwise(i,j)
|
116
|
+
dm=dominance_for_nil_model(i,j)
|
117
|
+
return 0.5 if dm==0.5
|
118
|
+
dominances=[dm]
|
119
|
+
for k in 1...@fields.size
|
120
|
+
a=average_k(k)
|
121
|
+
if a[i]>a[j]
|
122
|
+
dominances.push(1)
|
123
|
+
elsif a[i]<a[j]
|
124
|
+
dominances.push(0)
|
125
|
+
else
|
126
|
+
return 0.5
|
127
|
+
dominances.push(0.5)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
final=dominances.uniq
|
131
|
+
final.size>1 ? 0.5 : final[0]
|
132
|
+
end
|
133
|
+
# Returns 1 if i gD k, 0 if j gD i and 0.5 if undetermined
|
134
|
+
def general_dominance_pairwise(i,j)
|
135
|
+
ga=general_averages
|
136
|
+
if ga[i]>ga[j]
|
137
|
+
1
|
138
|
+
elsif ga[i]<ga[j]
|
139
|
+
0
|
140
|
+
else
|
141
|
+
0.5
|
142
|
+
end
|
143
|
+
end
|
144
|
+
def pairs
|
145
|
+
@models.find_all{|m| m.size==2}
|
146
|
+
end
|
147
|
+
def total_dominance
|
148
|
+
pairs.inject({}){|a,pair| a[pair]=total_dominance_pairwise(pair[0], pair[1])
|
149
|
+
a
|
150
|
+
}
|
151
|
+
end
|
152
|
+
def conditional_dominance
|
153
|
+
pairs.inject({}){|a,pair|
|
154
|
+
a[pair]=conditional_dominance_pairwise(pair[0], pair[1])
|
155
|
+
a
|
156
|
+
}
|
157
|
+
end
|
158
|
+
def general_dominance
|
159
|
+
pairs.inject({}){|a,pair|
|
160
|
+
a[pair]=general_dominance_pairwise(pair[0], pair[1])
|
161
|
+
a
|
162
|
+
}
|
163
|
+
end
|
164
|
+
|
165
|
+
def md(m)
|
166
|
+
@models_data[m.sort]
|
167
|
+
end
|
168
|
+
# Get all model of size k
|
169
|
+
def md_k(k)
|
170
|
+
out=[]
|
171
|
+
models=@models.each{|m| out.push(md(m)) if m.size==k }
|
172
|
+
out
|
173
|
+
end
|
174
|
+
|
175
|
+
# For a hash with arrays of numbers as values
|
176
|
+
# Returns a hash with same keys and
|
177
|
+
# value as the mean of values of original hash
|
178
|
+
|
179
|
+
def get_averages(averages)
|
180
|
+
out={}
|
181
|
+
averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
|
182
|
+
out
|
183
|
+
end
|
184
|
+
# Hash with average for each k size model.
|
185
|
+
def average_k(k)
|
186
|
+
return nil if k==@fields.size
|
187
|
+
models=md_k(k)
|
188
|
+
averages=@fields.inject({}) {|a,v| a[v]=[];a}
|
189
|
+
models.each do |m|
|
190
|
+
@fields.each do |f|
|
191
|
+
averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
|
152
192
|
end
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
ds_prev=@ds.dup(convert+[@y_var])
|
164
|
-
modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
|
165
|
-
@models_data[convert.sort]=modeldata
|
166
|
-
}
|
193
|
+
end
|
194
|
+
get_averages(averages)
|
195
|
+
end
|
196
|
+
def general_averages
|
197
|
+
if @general_averages.nil?
|
198
|
+
averages=@fields.inject({}) {|a,v| a[v]=[md([v]).r2];a}
|
199
|
+
for k in 1...@fields.size
|
200
|
+
ak=average_k(k)
|
201
|
+
@fields.each do |f|
|
202
|
+
averages[f].push(ak[f])
|
167
203
|
end
|
168
204
|
end
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
name=p.join(" - ")
|
216
|
-
row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
|
217
|
-
t.add_row(row)
|
218
|
-
}
|
219
|
-
out.parse_table(t)
|
220
|
-
return out
|
221
|
-
end
|
222
|
-
class ModelData
|
223
|
-
attr_reader :contributions
|
224
|
-
def initialize(name,ds,y_var,fields,r_class)
|
225
|
-
@name=name
|
226
|
-
@fields=fields
|
227
|
-
@contributions=@fields.inject({}){|a,v| a[v]=nil;a}
|
228
|
-
r_class=Regression::Multiple::RubyEngine if r_class.nil?
|
229
|
-
@lr=r_class.new(ds,y_var)
|
230
|
-
end
|
231
|
-
def add_contribution(f,v)
|
232
|
-
@contributions[f]=v-r2
|
233
|
-
end
|
234
|
-
def r2
|
235
|
-
@lr.r2
|
236
|
-
end
|
237
|
-
def add_table_row
|
238
|
-
begin
|
239
|
-
sign=sprintf("%0.3f", @lr.significance)
|
240
|
-
rescue RuntimeError
|
241
|
-
sign="???"
|
242
|
-
end
|
243
|
-
[@name.join("*"), sprintf("%0.3f",r2), sign] + @fields.collect{|k|
|
244
|
-
v=@contributions[k]
|
245
|
-
if v.nil?
|
246
|
-
"--"
|
247
|
-
else
|
248
|
-
sprintf("%0.3f",v)
|
249
|
-
end
|
250
|
-
}
|
251
|
-
end
|
252
|
-
def summary
|
253
|
-
out=sprintf("%s: r2=%0.3f(p=%0.2f)\n",@name.join("*"),r2,@lr.significance,@lr.sst)
|
254
|
-
out << @fields.collect{|k|
|
255
|
-
v=@contributions[k]
|
256
|
-
if v.nil?
|
257
|
-
"--"
|
258
|
-
else
|
259
|
-
sprintf("%s=%0.3f",k,v)
|
260
|
-
end
|
261
|
-
}.join(" | ")
|
262
|
-
out << "\n"
|
263
|
-
|
264
|
-
return out
|
265
|
-
end
|
266
|
-
end
|
205
|
+
@general_averages=get_averages(averages)
|
206
|
+
end
|
207
|
+
@general_averages
|
208
|
+
end
|
209
|
+
def create_models
|
210
|
+
@models=[]
|
211
|
+
@models_data={}
|
212
|
+
for i in 1..@fields.size
|
213
|
+
c=Statsample::Combination.new(i,@fields.size)
|
214
|
+
c.each do |data|
|
215
|
+
convert=data.collect {|i1| @fields[i1] }
|
216
|
+
@models.push(convert)
|
217
|
+
ds_prev=@ds.dup(convert+[@y_var])
|
218
|
+
modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
|
219
|
+
@models_data[convert.sort]=modeldata
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
def summary(report_type=ConsoleSummary)
|
224
|
+
out=""
|
225
|
+
out.extend report_type
|
226
|
+
out << _("Summary for Dominance Analysis of %s on %s\n") % [@fields.join(", "),@y_var]
|
227
|
+
t=Statsample::ReportTable.new
|
228
|
+
t.header=["","r2","sign"]+@fields
|
229
|
+
row=[_("Model 0"),"",""]+@fields.collect{|f|
|
230
|
+
sprintf("%0.3f", md([f]).r2)
|
231
|
+
}
|
232
|
+
t.add_row(row)
|
233
|
+
t.add_horizontal_line
|
234
|
+
for i in 1..@fields.size
|
235
|
+
mk=md_k(i)
|
236
|
+
mk.each{|m|
|
237
|
+
t.add_row(m.add_table_row)
|
238
|
+
}
|
239
|
+
# Report averages
|
240
|
+
a=average_k(i)
|
241
|
+
if !a.nil?
|
242
|
+
t.add_horizontal_line
|
243
|
+
row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
|
244
|
+
sprintf("%0.3f",a[f])
|
245
|
+
}
|
246
|
+
t.add_row(row)
|
247
|
+
t.add_horizontal_line
|
248
|
+
|
249
|
+
end
|
250
|
+
|
267
251
|
end
|
268
252
|
|
253
|
+
g=general_averages
|
254
|
+
t.add_horizontal_line
|
255
|
+
|
256
|
+
row=[_("Overall averages"),"",""]+@fields.collect{|f|
|
257
|
+
sprintf("%0.3f",g[f])
|
258
|
+
}
|
259
|
+
t.add_row(row)
|
260
|
+
out.parse_table(t)
|
261
|
+
|
262
|
+
out.nl
|
263
|
+
out << _("Pairwise")+"\n"
|
264
|
+
td=total_dominance
|
265
|
+
cd=conditional_dominance
|
266
|
+
gd=general_dominance
|
267
|
+
t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
|
268
|
+
pairs.each{|p|
|
269
|
+
name=p.join(" - ")
|
270
|
+
row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
|
271
|
+
t.add_row(row)
|
272
|
+
}
|
273
|
+
out.parse_table(t)
|
274
|
+
return out
|
275
|
+
end
|
276
|
+
class ModelData
|
277
|
+
attr_reader :contributions
|
278
|
+
def initialize(name,ds,y_var,fields,r_class)
|
279
|
+
@name=name
|
280
|
+
@fields=fields
|
281
|
+
@contributions=@fields.inject({}){|a,v| a[v]=nil;a}
|
282
|
+
r_class=Regression::Multiple::RubyEngine if r_class.nil?
|
283
|
+
@lr=r_class.new(ds,y_var)
|
284
|
+
end
|
285
|
+
def add_contribution(f,v)
|
286
|
+
@contributions[f]=v-r2
|
287
|
+
end
|
288
|
+
def r2
|
289
|
+
@lr.r2
|
290
|
+
end
|
291
|
+
def add_table_row
|
292
|
+
begin
|
293
|
+
sign=sprintf("%0.3f", @lr.significance)
|
294
|
+
rescue RuntimeError
|
295
|
+
sign="???"
|
296
|
+
end
|
297
|
+
[@name.join("*"), sprintf("%0.3f",r2), sign] + @fields.collect{|k|
|
298
|
+
v=@contributions[k]
|
299
|
+
if v.nil?
|
300
|
+
"--"
|
301
|
+
else
|
302
|
+
sprintf("%0.3f",v)
|
303
|
+
end
|
304
|
+
}
|
305
|
+
end
|
306
|
+
def summary
|
307
|
+
out=sprintf("%s: r2=%0.3f(p=%0.2f)\n",@name.join("*"),r2,@lr.significance,@lr.sst)
|
308
|
+
out << @fields.collect{|k|
|
309
|
+
v=@contributions[k]
|
310
|
+
if v.nil?
|
311
|
+
"--"
|
312
|
+
else
|
313
|
+
sprintf("%s=%0.3f",k,v)
|
314
|
+
end
|
315
|
+
}.join(" | ")
|
316
|
+
out << "\n"
|
317
|
+
return out
|
318
|
+
end
|
319
|
+
end # end ModelData
|
320
|
+
end # end Dominance Analysis
|
269
321
|
end
|
@@ -1,5 +1,8 @@
|
|
1
1
|
module Statsample
|
2
2
|
class DominanceAnalysis
|
3
|
+
# Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
|
4
|
+
# References:
|
5
|
+
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
|
3
6
|
class Bootstrap
|
4
7
|
include GetText
|
5
8
|
include Writable
|
@@ -3,18 +3,44 @@ module Statsample
|
|
3
3
|
module Regression
|
4
4
|
# Module for Linear Multiple Regression Analysis.
|
5
5
|
#
|
6
|
-
# You can call Regression::Multiple.listwise
|
6
|
+
# You can call Statsample::Regression::Multiple.listwise, Statsample::Regression::Multiple.pairwise or instance directly the engines.
|
7
7
|
#
|
8
|
-
#
|
8
|
+
# Use:.
|
9
9
|
#
|
10
10
|
# require 'statsample'
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
# ds={'a'
|
16
|
-
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
17
|
-
#
|
11
|
+
# a=1000.times.collect {rand}.to_scale
|
12
|
+
# b=1000.times.collect {rand}.to_scale
|
13
|
+
# c=1000.times.collect {rand}.to_scale
|
14
|
+
# ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
15
|
+
# ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
|
16
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
17
|
+
# puts lr.summary
|
18
|
+
# Summary for regression of a,b,c over y
|
19
|
+
# *************************************************************
|
20
|
+
# Engine: Statsample::Regression::Multiple::AlglibEngine
|
21
|
+
# Cases(listwise)=1000(1000)
|
22
|
+
# r=0.986
|
23
|
+
# r2=0.973
|
24
|
+
# Equation=0.504+5.011a + 2.995b + 1.988c
|
25
|
+
# ----------------------------
|
26
|
+
# ANOVA TABLE
|
27
|
+
# --------------------------------------------------------------
|
28
|
+
# | source | ss | df | ms | f | s |
|
29
|
+
# --------------------------------------------------------------
|
30
|
+
# | Regression | 2979.321 | 3 | 993.107 | 12040.067 | 0.000 |
|
31
|
+
# | Error | 82.154 | 996 | 0.082 | | |
|
32
|
+
# | Total | 3061.475 | 999 | | | |
|
33
|
+
# --------------------------------------------------------------
|
34
|
+
# Beta coefficientes
|
35
|
+
# -----------------------------------------------
|
36
|
+
# | coeff | b | beta | se | t |
|
37
|
+
# -----------------------------------------------
|
38
|
+
# | Constant | 0.504 | - | 0.030 | 16.968 |
|
39
|
+
# | a | 5.011 | 0.832 | 0.031 | 159.486 |
|
40
|
+
# | b | 2.995 | 0.492 | 0.032 | 94.367 |
|
41
|
+
# | c | 1.988 | 0.323 | 0.032 | 62.132 |
|
42
|
+
# -----------------------------------------------
|
43
|
+
#
|
18
44
|
module Multiple
|
19
45
|
# Creates an object for listwise regression.
|
20
46
|
# Alglib is faster, so is prefered over GSL
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-08 00:00:00 -03:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|