statsample 0.6.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,6 @@
1
+ === 0.6.1 / 2010-02-08
2
+ * Bug fix on DominanceAnalysis summary for Ruby1.9
3
+ * Some extra documentation
1
4
  === 0.6.0 / 2010-02-05
2
5
  * New Statsample::Factor module. Include classes for extracting factors (Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis) and rotate component matrix ( Statsample::Factor::Rotation subclasses). For now, only orthogonal rotations
3
6
  * New Statsample::Dataset.crosstab_with_asignation, Statsample::Dataset.one_to_many
@@ -108,7 +108,7 @@ end
108
108
  # * Dataset: An union of vectors.
109
109
  #
110
110
  module Statsample
111
- VERSION = '0.6.0'
111
+ VERSION = '0.6.1'
112
112
  SPLIT_TOKEN = ","
113
113
  autoload(:Database, 'statsample/converters')
114
114
  autoload(:Anova, 'statsample/anova')
@@ -27,11 +27,11 @@ module Statsample
27
27
  # See http://www.john-uebersax.com/stat/tetra.htm for extensive
28
28
  # documentation about tetrachoric correlation.
29
29
  #
30
- # This class uses algorithm AS116 from Applied Statistics(1977)
31
- # vol.26, no.3.
32
- #
33
- # You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
30
+ # This class uses Brown(1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
34
31
  #
32
+ # == References:
33
+ # * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
34
+ #
35
35
  # <b>Usage</b>.
36
36
  # With two variables x and y on a crosstab like this:
37
37
  #
@@ -1,269 +1,321 @@
1
1
  require 'statsample/dominanceanalysis/bootstrap'
2
2
  module Statsample
3
- class DominanceAnalysis
4
- include GetText
5
- bindtextdomain("statsample")
6
- def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
7
- @y_var=y_var
8
- @dy=ds[@y_var]
9
- @ds=ds
10
- @r_class=r_class
11
- @ds_indep=ds.dup(ds.fields-[y_var])
12
- @fields=@ds_indep.fields
13
- create_models
14
- fill_models
15
- end
16
- def fill_models
17
- @models.each{|m|
18
- @fields.each{|f|
19
- next if m.include? f
20
- base_model=md(m)
21
- comp_model=md(m+[f])
22
- base_model.add_contribution(f,comp_model.r2)
23
- }
24
- }
25
- end
26
- def dominance_for_nil_model(i,j)
27
- if md(i).r2>md(j).r2
28
- 1
29
- elsif md(i).r2<md(j).r2
30
- 0
31
- else
32
- 0.5
33
- end
34
- end
35
- # Returns 1 if i D k, 0 if j dominates i and 0.5 if undetermined
36
- def total_dominance_pairwise(i,j)
37
- dm=dominance_for_nil_model(i,j)
38
- return 0.5 if dm==0.5
39
- dominances=[dm]
40
- @models_data.each{|k,m|
41
- if !m.contributions[i].nil? and !m.contributions[j].nil?
42
- if m.contributions[i]>m.contributions[j]
43
- dominances.push(1)
44
- elsif m.contributions[i]<m.contributions[j]
45
- dominances.push(0)
46
- else
47
- return 0.5
48
- #dominances.push(0.5)
49
- end
50
- end
51
- }
52
- final=dominances.uniq
53
- final.size>1 ? 0.5 : final[0]
54
- end
55
-
56
- # Returns 1 if i cD k, 0 if j cD i and 0.5 if undetermined
57
- def conditional_dominance_pairwise(i,j)
58
- dm=dominance_for_nil_model(i,j)
59
- return 0.5 if dm==0.5
60
- dominances=[dm]
61
- for k in 1...@fields.size
62
- a=average_k(k)
63
- if a[i]>a[j]
64
- dominances.push(1)
65
- elsif a[i]<a[j]
66
- dominances.push(0)
67
- else
68
- return 0.5
69
- dominances.push(0.5)
70
- end
71
- end
72
- final=dominances.uniq
73
- final.size>1 ? 0.5 : final[0]
3
+ # Dominance Analysis is a procedure based on an examination of the R2 values
4
+ # for all possible subset models, to identify the relevance of one or more
5
+ # predictors in the prediction of criterium.
6
+ #
7
+ # See Budescu(1993) and Azen & Budescu (2003) for more information.
8
+ # Use:
9
+ # a=1000.times.collect {rand}.to_scale
10
+ # b=1000.times.collect {rand}.to_scale
11
+ # c=1000.times.collect {rand}.to_scale
12
+ # ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
13
+ # ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
14
+ # da=Statsample::DominanceAnalysis.new(ds,'y')
15
+ # puts da.summary
16
+ # ==>
17
+ # Resultado del Analisis de Dominancia de a, b, c en y
18
+ #
19
+ # ----------------------------------------------------------------
20
+ # | | r2 | sign | a | b | c |
21
+ # ----------------------------------------------------------------
22
+ # | Modelo 0 | | | 0.637 | 0.260 | 0.115 |
23
+ # ----------------------------------------------------------------
24
+ # | a | 0.637 | 0.000 | -- | 0.239 | 0.109 |
25
+ # | b | 0.260 | 0.000 | 0.617 | -- | 0.103 |
26
+ # | c | 0.115 | 0.000 | 0.632 | 0.249 | -- |
27
+ # ----------------------------------------------------------------
28
+ # | k=1 Promedio | | | 0.624 | 0.244 | 0.106 |
29
+ # ----------------------------------------------------------------
30
+ # | a*b | 0.877 | 0.000 | -- | -- | 0.098 |
31
+ # | a*c | 0.746 | 0.000 | -- | 0.229 | -- |
32
+ # | b*c | 0.363 | 0.000 | 0.612 | -- | -- |
33
+ # ----------------------------------------------------------------
34
+ # | k=2 Promedio | | | 0.612 | 0.229 | 0.098 |
35
+ # ----------------------------------------------------------------
36
+ # | a*b*c | 0.975 | 0.000 | -- | -- | -- |
37
+ # ----------------------------------------------------------------
38
+ # | Promedios generales | | | 0.624 | 0.244 | 0.106 |
39
+ # ----------------------------------------------------------------
40
+ #
41
+ # De a pares
42
+ #
43
+ # ----------------------------
44
+ # | Pares | T | C | G |
45
+ # ----------------------------
46
+ # | a - b | 1.0 | 1.0 | 1.0 |
47
+ # | a - c | 1.0 | 1.0 | 1.0 |
48
+ # | b - c | 1.0 | 1.0 | 1.0 |
49
+ # ----------------------------
50
+ #
51
+ # == References:
52
+ # * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. _Psychological Bulletin, 114_, 542-551.
53
+ # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
54
+ class DominanceAnalysis
55
+ include GetText
56
+ bindtextdomain("statsample")
57
+ # Creates a new DominanceAnalysis object
58
+ # Params:
59
+ # * ds: A Dataset object
60
+ # * y_var: Name of dependent variable
61
+ # * r_class: Class to generate the regressions. Could be any subclass of
62
+ # Statsample::Regression::Multiple::BaseEngine
63
+ #
64
+ def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
65
+ @y_var=y_var
66
+ @dy=ds[@y_var]
67
+ @ds=ds
68
+ @r_class=r_class
69
+ @ds_indep=ds.dup(ds.fields-[y_var])
70
+ @fields=@ds_indep.fields
71
+ create_models
72
+ fill_models
73
+ end
74
+ def fill_models
75
+ @models.each do |m|
76
+ @fields.each do |f|
77
+ next if m.include? f
78
+ base_model=md(m)
79
+ comp_model=md(m+[f])
80
+ base_model.add_contribution(f,comp_model.r2)
74
81
  end
75
- # Returns 1 if i gD k, 0 if j gD i and 0.5 if undetermined
76
- def general_dominance_pairwise(i,j)
77
- ga=general_averages
78
- if ga[i]>ga[j]
79
- 1
80
- elsif ga[i]<ga[j]
81
- 0
82
+ end
83
+ end
84
+ def dominance_for_nil_model(i,j)
85
+ if md([i]).r2>md([j]).r2
86
+ 1
87
+ elsif md([i]).r2<md([j]).r2
88
+ 0
89
+ else
90
+ 0.5
91
+ end
92
+ end
93
+ # Returns 1 if i D k, 0 if j dominates i and 0.5 if undetermined
94
+ def total_dominance_pairwise(i,j)
95
+ dm=dominance_for_nil_model(i,j)
96
+ return 0.5 if dm==0.5
97
+ dominances=[dm]
98
+ @models_data.each do |k,m|
99
+ if !m.contributions[i].nil? and !m.contributions[j].nil?
100
+ if m.contributions[i]>m.contributions[j]
101
+ dominances.push(1)
102
+ elsif m.contributions[i]<m.contributions[j]
103
+ dominances.push(0)
82
104
  else
83
- 0.5
84
- end
85
- end
86
- def pairs
87
- @models.find_all{|m| m.size==2}
88
- end
89
- def total_dominance
90
- pairs.inject({}){|a,pair|
91
- a[pair]=total_dominance_pairwise(pair[0], pair[1])
92
- a
93
- }
94
- end
95
- def conditional_dominance
96
- pairs.inject({}){|a,pair|
97
- a[pair]=conditional_dominance_pairwise(pair[0], pair[1])
98
- a
99
- }
100
- end
101
- def general_dominance
102
- pairs.inject({}){|a,pair|
103
- a[pair]=general_dominance_pairwise(pair[0], pair[1])
104
- a
105
- }
106
- end
107
-
108
- def md(m)
109
- @models_data[m.sort]
110
- end
111
- # Get all model of size k
112
- def md_k(k)
113
- out=[]
114
- models=@models.each{|m| out.push(md(m)) if m.size==k }
115
- out
116
- end
117
-
118
- # For a hash with arrays of numbers as values
119
- # Returns a hash with same keys and
120
- # value as the mean of values of original hash
121
-
122
- def get_averages(averages)
123
- out={}
124
- averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
125
- out
126
- end
127
- # Hash with average for each k size
128
- # model
129
- def average_k(k)
130
- return nil if k==@fields.size
131
- models=md_k(k)
132
- averages=@fields.inject({}) {|a,v| a[v]=[];a}
133
- models.each do |m|
134
- @fields.each do |f|
135
- averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
136
- end
105
+ return 0.5
106
+ #dominances.push(0.5)
137
107
  end
138
- get_averages(averages)
139
108
  end
140
- def general_averages
141
- if @general_averages.nil?
142
- averages=@fields.inject({}) {|a,v| a[v]=[md(v).r2];a}
143
- for k in 1...@fields.size
144
- ak=average_k(k)
145
- @fields.each{|f|
146
- averages[f].push(ak[f])
147
- }
148
- end
149
- @general_averages=get_averages(averages)
150
- end
151
- @general_averages
109
+ end
110
+ final=dominances.uniq
111
+ final.size>1 ? 0.5 : final[0]
112
+ end
113
+
114
+ # Returns 1 if i cD k, 0 if j cD i and 0.5 if undetermined
115
+ def conditional_dominance_pairwise(i,j)
116
+ dm=dominance_for_nil_model(i,j)
117
+ return 0.5 if dm==0.5
118
+ dominances=[dm]
119
+ for k in 1...@fields.size
120
+ a=average_k(k)
121
+ if a[i]>a[j]
122
+ dominances.push(1)
123
+ elsif a[i]<a[j]
124
+ dominances.push(0)
125
+ else
126
+ return 0.5
127
+ dominances.push(0.5)
128
+ end
129
+ end
130
+ final=dominances.uniq
131
+ final.size>1 ? 0.5 : final[0]
132
+ end
133
+ # Returns 1 if i gD k, 0 if j gD i and 0.5 if undetermined
134
+ def general_dominance_pairwise(i,j)
135
+ ga=general_averages
136
+ if ga[i]>ga[j]
137
+ 1
138
+ elsif ga[i]<ga[j]
139
+ 0
140
+ else
141
+ 0.5
142
+ end
143
+ end
144
+ def pairs
145
+ @models.find_all{|m| m.size==2}
146
+ end
147
+ def total_dominance
148
+ pairs.inject({}){|a,pair| a[pair]=total_dominance_pairwise(pair[0], pair[1])
149
+ a
150
+ }
151
+ end
152
+ def conditional_dominance
153
+ pairs.inject({}){|a,pair|
154
+ a[pair]=conditional_dominance_pairwise(pair[0], pair[1])
155
+ a
156
+ }
157
+ end
158
+ def general_dominance
159
+ pairs.inject({}){|a,pair|
160
+ a[pair]=general_dominance_pairwise(pair[0], pair[1])
161
+ a
162
+ }
163
+ end
164
+
165
+ def md(m)
166
+ @models_data[m.sort]
167
+ end
168
+ # Get all model of size k
169
+ def md_k(k)
170
+ out=[]
171
+ models=@models.each{|m| out.push(md(m)) if m.size==k }
172
+ out
173
+ end
174
+
175
+ # For a hash with arrays of numbers as values
176
+ # Returns a hash with same keys and
177
+ # value as the mean of values of original hash
178
+
179
+ def get_averages(averages)
180
+ out={}
181
+ averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
182
+ out
183
+ end
184
+ # Hash with average for each k size model.
185
+ def average_k(k)
186
+ return nil if k==@fields.size
187
+ models=md_k(k)
188
+ averages=@fields.inject({}) {|a,v| a[v]=[];a}
189
+ models.each do |m|
190
+ @fields.each do |f|
191
+ averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
152
192
  end
153
- def create_models
154
- @models=[]
155
- @models_data={}
156
- for i in 1..@fields.size
157
- c=Statsample::Combination.new(i,@fields.size)
158
- c.each{|data|
159
- convert=data.collect {|i1|
160
- @fields[i1]
161
- }
162
- @models.push(convert)
163
- ds_prev=@ds.dup(convert+[@y_var])
164
- modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
165
- @models_data[convert.sort]=modeldata
166
- }
193
+ end
194
+ get_averages(averages)
195
+ end
196
+ def general_averages
197
+ if @general_averages.nil?
198
+ averages=@fields.inject({}) {|a,v| a[v]=[md([v]).r2];a}
199
+ for k in 1...@fields.size
200
+ ak=average_k(k)
201
+ @fields.each do |f|
202
+ averages[f].push(ak[f])
167
203
  end
168
204
  end
169
- def summary(report_type=ConsoleSummary)
170
- out=""
171
- out.extend report_type
172
- out << _("Summary for Dominance Analysis of %s on %s\n") % [@fields.join(", "),@y_var]
173
- t=Statsample::ReportTable.new
174
- t.header=["","r2","sign"]+@fields
175
- row=[_("Model 0"),"",""]+@fields.collect{|f|
176
- sprintf("%0.3f",md(f).r2)
177
- }
178
- t.add_row(row)
179
- t.add_horizontal_line
180
- for i in 1..@fields.size
181
- mk=md_k(i)
182
- mk.each{|m|
183
- t.add_row(m.add_table_row)
184
- }
185
- # Report averages
186
- a=average_k(i)
187
- if !a.nil?
188
- t.add_horizontal_line
189
- row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
190
- sprintf("%0.3f",a[f])
191
- }
192
- t.add_row(row)
193
- t.add_horizontal_line
194
-
195
- end
196
-
197
- end
198
-
199
- g=general_averages
200
- t.add_horizontal_line
201
-
202
- row=[_("Overall averages"),"",""]+@fields.collect{|f|
203
- sprintf("%0.3f",g[f])
204
- }
205
- t.add_row(row)
206
- out.parse_table(t)
207
-
208
- out.nl
209
- out << _("Pairwise")+"\n"
210
- td=total_dominance
211
- cd=conditional_dominance
212
- gd=general_dominance
213
- t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
214
- pairs.each{|p|
215
- name=p.join(" - ")
216
- row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
217
- t.add_row(row)
218
- }
219
- out.parse_table(t)
220
- return out
221
- end
222
- class ModelData
223
- attr_reader :contributions
224
- def initialize(name,ds,y_var,fields,r_class)
225
- @name=name
226
- @fields=fields
227
- @contributions=@fields.inject({}){|a,v| a[v]=nil;a}
228
- r_class=Regression::Multiple::RubyEngine if r_class.nil?
229
- @lr=r_class.new(ds,y_var)
230
- end
231
- def add_contribution(f,v)
232
- @contributions[f]=v-r2
233
- end
234
- def r2
235
- @lr.r2
236
- end
237
- def add_table_row
238
- begin
239
- sign=sprintf("%0.3f", @lr.significance)
240
- rescue RuntimeError
241
- sign="???"
242
- end
243
- [@name.join("*"), sprintf("%0.3f",r2), sign] + @fields.collect{|k|
244
- v=@contributions[k]
245
- if v.nil?
246
- "--"
247
- else
248
- sprintf("%0.3f",v)
249
- end
250
- }
251
- end
252
- def summary
253
- out=sprintf("%s: r2=%0.3f(p=%0.2f)\n",@name.join("*"),r2,@lr.significance,@lr.sst)
254
- out << @fields.collect{|k|
255
- v=@contributions[k]
256
- if v.nil?
257
- "--"
258
- else
259
- sprintf("%s=%0.3f",k,v)
260
- end
261
- }.join(" | ")
262
- out << "\n"
263
-
264
- return out
265
- end
266
- end
205
+ @general_averages=get_averages(averages)
206
+ end
207
+ @general_averages
208
+ end
209
+ def create_models
210
+ @models=[]
211
+ @models_data={}
212
+ for i in 1..@fields.size
213
+ c=Statsample::Combination.new(i,@fields.size)
214
+ c.each do |data|
215
+ convert=data.collect {|i1| @fields[i1] }
216
+ @models.push(convert)
217
+ ds_prev=@ds.dup(convert+[@y_var])
218
+ modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
219
+ @models_data[convert.sort]=modeldata
220
+ end
221
+ end
222
+ end
223
+ def summary(report_type=ConsoleSummary)
224
+ out=""
225
+ out.extend report_type
226
+ out << _("Summary for Dominance Analysis of %s on %s\n") % [@fields.join(", "),@y_var]
227
+ t=Statsample::ReportTable.new
228
+ t.header=["","r2","sign"]+@fields
229
+ row=[_("Model 0"),"",""]+@fields.collect{|f|
230
+ sprintf("%0.3f", md([f]).r2)
231
+ }
232
+ t.add_row(row)
233
+ t.add_horizontal_line
234
+ for i in 1..@fields.size
235
+ mk=md_k(i)
236
+ mk.each{|m|
237
+ t.add_row(m.add_table_row)
238
+ }
239
+ # Report averages
240
+ a=average_k(i)
241
+ if !a.nil?
242
+ t.add_horizontal_line
243
+ row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
244
+ sprintf("%0.3f",a[f])
245
+ }
246
+ t.add_row(row)
247
+ t.add_horizontal_line
248
+
249
+ end
250
+
267
251
  end
268
252
 
253
+ g=general_averages
254
+ t.add_horizontal_line
255
+
256
+ row=[_("Overall averages"),"",""]+@fields.collect{|f|
257
+ sprintf("%0.3f",g[f])
258
+ }
259
+ t.add_row(row)
260
+ out.parse_table(t)
261
+
262
+ out.nl
263
+ out << _("Pairwise")+"\n"
264
+ td=total_dominance
265
+ cd=conditional_dominance
266
+ gd=general_dominance
267
+ t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
268
+ pairs.each{|p|
269
+ name=p.join(" - ")
270
+ row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
271
+ t.add_row(row)
272
+ }
273
+ out.parse_table(t)
274
+ return out
275
+ end
276
+ class ModelData
277
+ attr_reader :contributions
278
+ def initialize(name,ds,y_var,fields,r_class)
279
+ @name=name
280
+ @fields=fields
281
+ @contributions=@fields.inject({}){|a,v| a[v]=nil;a}
282
+ r_class=Regression::Multiple::RubyEngine if r_class.nil?
283
+ @lr=r_class.new(ds,y_var)
284
+ end
285
+ def add_contribution(f,v)
286
+ @contributions[f]=v-r2
287
+ end
288
+ def r2
289
+ @lr.r2
290
+ end
291
+ def add_table_row
292
+ begin
293
+ sign=sprintf("%0.3f", @lr.significance)
294
+ rescue RuntimeError
295
+ sign="???"
296
+ end
297
+ [@name.join("*"), sprintf("%0.3f",r2), sign] + @fields.collect{|k|
298
+ v=@contributions[k]
299
+ if v.nil?
300
+ "--"
301
+ else
302
+ sprintf("%0.3f",v)
303
+ end
304
+ }
305
+ end
306
+ def summary
307
+ out=sprintf("%s: r2=%0.3f(p=%0.2f)\n",@name.join("*"),r2,@lr.significance,@lr.sst)
308
+ out << @fields.collect{|k|
309
+ v=@contributions[k]
310
+ if v.nil?
311
+ "--"
312
+ else
313
+ sprintf("%s=%0.3f",k,v)
314
+ end
315
+ }.join(" | ")
316
+ out << "\n"
317
+ return out
318
+ end
319
+ end # end ModelData
320
+ end # end Dominance Analysis
269
321
  end
@@ -1,5 +1,8 @@
1
1
  module Statsample
2
2
  class DominanceAnalysis
3
+ # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
4
+ # References:
5
+ # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
3
6
  class Bootstrap
4
7
  include GetText
5
8
  include Writable
@@ -3,18 +3,44 @@ module Statsample
3
3
  module Regression
4
4
  # Module for Linear Multiple Regression Analysis.
5
5
  #
6
- # You can call Regression::Multiple.listwise or Regression::Multiple.pairwise or instance directly the engines.
6
+ # You can call Statsample::Regression::Multiple.listwise, Statsample::Regression::Multiple.pairwise or instance directly the engines.
7
7
  #
8
- # Example.
8
+ # Use:.
9
9
  #
10
10
  # require 'statsample'
11
- # @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
12
- # @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
13
- # @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
14
- # @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
15
- # ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
16
- # lr=Statsample::Regression::Multiple.listwise(ds,'y')
17
- # #<Statsample::Regression::Multiple::AlglibEngine:0x7f21912e4758 @ds_valid=#<Statsample::Dataset:69891073182680 @fields=[a,b,c,y] labels={"a"=>nil, "b"=>nil, "y"=>nil, "c"=>nil} cases=10, @lr=#<Alglib::LinearRegression:0x7f21912df118 @model=#<Alglib_ext::LinearModel:0x7f21912df708>, @ivars=3, @cases=10, @report=#<Alglib_ext::LrReport:0x7f21912df168>>, @y_var="y", @ds=#<Statsample::Dataset:69891073182680 @fields=[a,b,c,y] labels={"a"=>nil, "b"=>nil, "y"=>nil, "c"=>nil} cases=10, @fields=["a", "b", "c"], @lr_s=nil, @dep_columns=[[1, 3, 2, 4, 3, 5, 4, 6, 5, 7], [3, 3, 4, 4, 5, 5, 6, 6, 4, 4], [11, 22, 30, 40, 50, 65, 78, 79, 99, 100]], @ds_indep=#<Statsample::Dataset:69891073180060 @fields=[a,b,c] labels={"a"=>nil, "b"=>nil, "c"=>nil} cases=10, @dy=Vector(type:scale, n:10)[3,4,5,6,7,8,9,10,20,30]>
11
+ # a=1000.times.collect {rand}.to_scale
12
+ # b=1000.times.collect {rand}.to_scale
13
+ # c=1000.times.collect {rand}.to_scale
14
+ # ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
15
+ # ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
16
+ # lr=Statsample::Regression::Multiple.listwise(ds,'y')
17
+ # puts lr.summary
18
+ # Summary for regression of a,b,c over y
19
+ # *************************************************************
20
+ # Engine: Statsample::Regression::Multiple::AlglibEngine
21
+ # Cases(listwise)=1000(1000)
22
+ # r=0.986
23
+ # r2=0.973
24
+ # Equation=0.504+5.011a + 2.995b + 1.988c
25
+ # ----------------------------
26
+ # ANOVA TABLE
27
+ # --------------------------------------------------------------
28
+ # | source | ss | df | ms | f | s |
29
+ # --------------------------------------------------------------
30
+ # | Regression | 2979.321 | 3 | 993.107 | 12040.067 | 0.000 |
31
+ # | Error | 82.154 | 996 | 0.082 | | |
32
+ # | Total | 3061.475 | 999 | | | |
33
+ # --------------------------------------------------------------
34
+ # Beta coefficientes
35
+ # -----------------------------------------------
36
+ # | coeff | b | beta | se | t |
37
+ # -----------------------------------------------
38
+ # | Constant | 0.504 | - | 0.030 | 16.968 |
39
+ # | a | 5.011 | 0.832 | 0.031 | 159.486 |
40
+ # | b | 2.995 | 0.492 | 0.032 | 94.367 |
41
+ # | c | 1.988 | 0.323 | 0.032 | 62.132 |
42
+ # -----------------------------------------------
43
+ #
18
44
  module Multiple
19
45
  # Creates an object for listwise regression.
20
46
  # Alglib is faster, so is prefered over GSL
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsample
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-02-06 00:00:00 -03:00
12
+ date: 2010-02-08 00:00:00 -03:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency