statsample 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,6 @@
1
+ === 0.6.1 / 2010-02-08
2
+ * Bug fix on DominanceAnalysis summary for Ruby1.9
3
+ * Some extra documentation
1
4
  === 0.6.0 / 2010-02-05
2
5
  * New Statsample::Factor module. Include classes for extracting factors (Statsample::Factor::PCA and Statsample::Factor::PrincipalAxis) and rotate component matrix ( Statsample::Factor::Rotation subclasses). For now, only orthogonal rotations
3
6
  * New Statsample::Dataset.crosstab_with_asignation, Statsample::Dataset.one_to_many
@@ -108,7 +108,7 @@ end
108
108
  # * Dataset: An union of vectors.
109
109
  #
110
110
  module Statsample
111
- VERSION = '0.6.0'
111
+ VERSION = '0.6.1'
112
112
  SPLIT_TOKEN = ","
113
113
  autoload(:Database, 'statsample/converters')
114
114
  autoload(:Anova, 'statsample/anova')
@@ -27,11 +27,11 @@ module Statsample
27
27
  # See http://www.john-uebersax.com/stat/tetra.htm for extensive
28
28
  # documentation about tetrachoric correlation.
29
29
  #
30
- # This class uses algorithm AS116 from Applied Statistics(1977)
31
- # vol.26, no.3.
32
- #
33
- # You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
30
+ # This class uses Brown(1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
34
31
  #
32
+ # == References:
33
+ # * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
34
+ #
35
35
  # <b>Usage</b>.
36
36
  # With two variables x and y on a crosstab like this:
37
37
  #
@@ -1,269 +1,321 @@
1
1
  require 'statsample/dominanceanalysis/bootstrap'
2
2
  module Statsample
3
- class DominanceAnalysis
4
- include GetText
5
- bindtextdomain("statsample")
6
- def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
7
- @y_var=y_var
8
- @dy=ds[@y_var]
9
- @ds=ds
10
- @r_class=r_class
11
- @ds_indep=ds.dup(ds.fields-[y_var])
12
- @fields=@ds_indep.fields
13
- create_models
14
- fill_models
15
- end
16
- def fill_models
17
- @models.each{|m|
18
- @fields.each{|f|
19
- next if m.include? f
20
- base_model=md(m)
21
- comp_model=md(m+[f])
22
- base_model.add_contribution(f,comp_model.r2)
23
- }
24
- }
25
- end
26
- def dominance_for_nil_model(i,j)
27
- if md(i).r2>md(j).r2
28
- 1
29
- elsif md(i).r2<md(j).r2
30
- 0
31
- else
32
- 0.5
33
- end
34
- end
35
- # Returns 1 if i D k, 0 if j dominates i and 0.5 if undetermined
36
- def total_dominance_pairwise(i,j)
37
- dm=dominance_for_nil_model(i,j)
38
- return 0.5 if dm==0.5
39
- dominances=[dm]
40
- @models_data.each{|k,m|
41
- if !m.contributions[i].nil? and !m.contributions[j].nil?
42
- if m.contributions[i]>m.contributions[j]
43
- dominances.push(1)
44
- elsif m.contributions[i]<m.contributions[j]
45
- dominances.push(0)
46
- else
47
- return 0.5
48
- #dominances.push(0.5)
49
- end
50
- end
51
- }
52
- final=dominances.uniq
53
- final.size>1 ? 0.5 : final[0]
54
- end
55
-
56
- # Returns 1 if i cD k, 0 if j cD i and 0.5 if undetermined
57
- def conditional_dominance_pairwise(i,j)
58
- dm=dominance_for_nil_model(i,j)
59
- return 0.5 if dm==0.5
60
- dominances=[dm]
61
- for k in 1...@fields.size
62
- a=average_k(k)
63
- if a[i]>a[j]
64
- dominances.push(1)
65
- elsif a[i]<a[j]
66
- dominances.push(0)
67
- else
68
- return 0.5
69
- dominances.push(0.5)
70
- end
71
- end
72
- final=dominances.uniq
73
- final.size>1 ? 0.5 : final[0]
3
+ # Dominance Analysis is a procedure based on an examination of the R2 values
4
+ # for all possible subset models, to identify the relevance of one or more
5
+ # predictors in the prediction of criterium.
6
+ #
7
+ # See Budescu(1993) and Azen & Budescu (2003) for more information.
8
+ # Use:
9
+ # a=1000.times.collect {rand}.to_scale
10
+ # b=1000.times.collect {rand}.to_scale
11
+ # c=1000.times.collect {rand}.to_scale
12
+ # ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
13
+ # ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
14
+ # da=Statsample::DominanceAnalysis.new(ds,'y')
15
+ # puts da.summary
16
+ # ==>
17
+ # Resultado del Analisis de Dominancia de a, b, c en y
18
+ #
19
+ # ----------------------------------------------------------------
20
+ # | | r2 | sign | a | b | c |
21
+ # ----------------------------------------------------------------
22
+ # | Modelo 0 | | | 0.637 | 0.260 | 0.115 |
23
+ # ----------------------------------------------------------------
24
+ # | a | 0.637 | 0.000 | -- | 0.239 | 0.109 |
25
+ # | b | 0.260 | 0.000 | 0.617 | -- | 0.103 |
26
+ # | c | 0.115 | 0.000 | 0.632 | 0.249 | -- |
27
+ # ----------------------------------------------------------------
28
+ # | k=1 Promedio | | | 0.624 | 0.244 | 0.106 |
29
+ # ----------------------------------------------------------------
30
+ # | a*b | 0.877 | 0.000 | -- | -- | 0.098 |
31
+ # | a*c | 0.746 | 0.000 | -- | 0.229 | -- |
32
+ # | b*c | 0.363 | 0.000 | 0.612 | -- | -- |
33
+ # ----------------------------------------------------------------
34
+ # | k=2 Promedio | | | 0.612 | 0.229 | 0.098 |
35
+ # ----------------------------------------------------------------
36
+ # | a*b*c | 0.975 | 0.000 | -- | -- | -- |
37
+ # ----------------------------------------------------------------
38
+ # | Promedios generales | | | 0.624 | 0.244 | 0.106 |
39
+ # ----------------------------------------------------------------
40
+ #
41
+ # De a pares
42
+ #
43
+ # ----------------------------
44
+ # | Pares | T | C | G |
45
+ # ----------------------------
46
+ # | a - b | 1.0 | 1.0 | 1.0 |
47
+ # | a - c | 1.0 | 1.0 | 1.0 |
48
+ # | b - c | 1.0 | 1.0 | 1.0 |
49
+ # ----------------------------
50
+ #
51
+ # == References:
52
+ # * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. _Psychological Bulletin, 114_, 542-551.
53
+ # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
54
+ class DominanceAnalysis
55
+ include GetText
56
+ bindtextdomain("statsample")
57
+ # Creates a new DominanceAnalysis object
58
+ # Params:
59
+ # * ds: A Dataset object
60
+ # * y_var: Name of dependent variable
61
+ # * r_class: Class to generate the regressions. Could be any subclass of
62
+ # Statsample::Regression::Multiple::BaseEngine
63
+ #
64
+ def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
65
+ @y_var=y_var
66
+ @dy=ds[@y_var]
67
+ @ds=ds
68
+ @r_class=r_class
69
+ @ds_indep=ds.dup(ds.fields-[y_var])
70
+ @fields=@ds_indep.fields
71
+ create_models
72
+ fill_models
73
+ end
74
+ def fill_models
75
+ @models.each do |m|
76
+ @fields.each do |f|
77
+ next if m.include? f
78
+ base_model=md(m)
79
+ comp_model=md(m+[f])
80
+ base_model.add_contribution(f,comp_model.r2)
74
81
  end
75
- # Returns 1 if i gD k, 0 if j gD i and 0.5 if undetermined
76
- def general_dominance_pairwise(i,j)
77
- ga=general_averages
78
- if ga[i]>ga[j]
79
- 1
80
- elsif ga[i]<ga[j]
81
- 0
82
+ end
83
+ end
84
+ def dominance_for_nil_model(i,j)
85
+ if md([i]).r2>md([j]).r2
86
+ 1
87
+ elsif md([i]).r2<md([j]).r2
88
+ 0
89
+ else
90
+ 0.5
91
+ end
92
+ end
93
+ # Returns 1 if i D k, 0 if j dominates i and 0.5 if undetermined
94
+ def total_dominance_pairwise(i,j)
95
+ dm=dominance_for_nil_model(i,j)
96
+ return 0.5 if dm==0.5
97
+ dominances=[dm]
98
+ @models_data.each do |k,m|
99
+ if !m.contributions[i].nil? and !m.contributions[j].nil?
100
+ if m.contributions[i]>m.contributions[j]
101
+ dominances.push(1)
102
+ elsif m.contributions[i]<m.contributions[j]
103
+ dominances.push(0)
82
104
  else
83
- 0.5
84
- end
85
- end
86
- def pairs
87
- @models.find_all{|m| m.size==2}
88
- end
89
- def total_dominance
90
- pairs.inject({}){|a,pair|
91
- a[pair]=total_dominance_pairwise(pair[0], pair[1])
92
- a
93
- }
94
- end
95
- def conditional_dominance
96
- pairs.inject({}){|a,pair|
97
- a[pair]=conditional_dominance_pairwise(pair[0], pair[1])
98
- a
99
- }
100
- end
101
- def general_dominance
102
- pairs.inject({}){|a,pair|
103
- a[pair]=general_dominance_pairwise(pair[0], pair[1])
104
- a
105
- }
106
- end
107
-
108
- def md(m)
109
- @models_data[m.sort]
110
- end
111
- # Get all model of size k
112
- def md_k(k)
113
- out=[]
114
- models=@models.each{|m| out.push(md(m)) if m.size==k }
115
- out
116
- end
117
-
118
- # For a hash with arrays of numbers as values
119
- # Returns a hash with same keys and
120
- # value as the mean of values of original hash
121
-
122
- def get_averages(averages)
123
- out={}
124
- averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
125
- out
126
- end
127
- # Hash with average for each k size
128
- # model
129
- def average_k(k)
130
- return nil if k==@fields.size
131
- models=md_k(k)
132
- averages=@fields.inject({}) {|a,v| a[v]=[];a}
133
- models.each do |m|
134
- @fields.each do |f|
135
- averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
136
- end
105
+ return 0.5
106
+ #dominances.push(0.5)
137
107
  end
138
- get_averages(averages)
139
108
  end
140
- def general_averages
141
- if @general_averages.nil?
142
- averages=@fields.inject({}) {|a,v| a[v]=[md(v).r2];a}
143
- for k in 1...@fields.size
144
- ak=average_k(k)
145
- @fields.each{|f|
146
- averages[f].push(ak[f])
147
- }
148
- end
149
- @general_averages=get_averages(averages)
150
- end
151
- @general_averages
109
+ end
110
+ final=dominances.uniq
111
+ final.size>1 ? 0.5 : final[0]
112
+ end
113
+
114
+ # Returns 1 if i cD k, 0 if j cD i and 0.5 if undetermined
115
+ def conditional_dominance_pairwise(i,j)
116
+ dm=dominance_for_nil_model(i,j)
117
+ return 0.5 if dm==0.5
118
+ dominances=[dm]
119
+ for k in 1...@fields.size
120
+ a=average_k(k)
121
+ if a[i]>a[j]
122
+ dominances.push(1)
123
+ elsif a[i]<a[j]
124
+ dominances.push(0)
125
+ else
126
+ return 0.5
127
+ dominances.push(0.5)
128
+ end
129
+ end
130
+ final=dominances.uniq
131
+ final.size>1 ? 0.5 : final[0]
132
+ end
133
+ # Returns 1 if i gD k, 0 if j gD i and 0.5 if undetermined
134
+ def general_dominance_pairwise(i,j)
135
+ ga=general_averages
136
+ if ga[i]>ga[j]
137
+ 1
138
+ elsif ga[i]<ga[j]
139
+ 0
140
+ else
141
+ 0.5
142
+ end
143
+ end
144
+ def pairs
145
+ @models.find_all{|m| m.size==2}
146
+ end
147
+ def total_dominance
148
+ pairs.inject({}){|a,pair| a[pair]=total_dominance_pairwise(pair[0], pair[1])
149
+ a
150
+ }
151
+ end
152
+ def conditional_dominance
153
+ pairs.inject({}){|a,pair|
154
+ a[pair]=conditional_dominance_pairwise(pair[0], pair[1])
155
+ a
156
+ }
157
+ end
158
+ def general_dominance
159
+ pairs.inject({}){|a,pair|
160
+ a[pair]=general_dominance_pairwise(pair[0], pair[1])
161
+ a
162
+ }
163
+ end
164
+
165
+ def md(m)
166
+ @models_data[m.sort]
167
+ end
168
+ # Get all model of size k
169
+ def md_k(k)
170
+ out=[]
171
+ models=@models.each{|m| out.push(md(m)) if m.size==k }
172
+ out
173
+ end
174
+
175
+ # For a hash with arrays of numbers as values
176
+ # Returns a hash with same keys and
177
+ # value as the mean of values of original hash
178
+
179
+ def get_averages(averages)
180
+ out={}
181
+ averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
182
+ out
183
+ end
184
+ # Hash with average for each k size model.
185
+ def average_k(k)
186
+ return nil if k==@fields.size
187
+ models=md_k(k)
188
+ averages=@fields.inject({}) {|a,v| a[v]=[];a}
189
+ models.each do |m|
190
+ @fields.each do |f|
191
+ averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
152
192
  end
153
- def create_models
154
- @models=[]
155
- @models_data={}
156
- for i in 1..@fields.size
157
- c=Statsample::Combination.new(i,@fields.size)
158
- c.each{|data|
159
- convert=data.collect {|i1|
160
- @fields[i1]
161
- }
162
- @models.push(convert)
163
- ds_prev=@ds.dup(convert+[@y_var])
164
- modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
165
- @models_data[convert.sort]=modeldata
166
- }
193
+ end
194
+ get_averages(averages)
195
+ end
196
+ def general_averages
197
+ if @general_averages.nil?
198
+ averages=@fields.inject({}) {|a,v| a[v]=[md([v]).r2];a}
199
+ for k in 1...@fields.size
200
+ ak=average_k(k)
201
+ @fields.each do |f|
202
+ averages[f].push(ak[f])
167
203
  end
168
204
  end
169
- def summary(report_type=ConsoleSummary)
170
- out=""
171
- out.extend report_type
172
- out << _("Summary for Dominance Analysis of %s on %s\n") % [@fields.join(", "),@y_var]
173
- t=Statsample::ReportTable.new
174
- t.header=["","r2","sign"]+@fields
175
- row=[_("Model 0"),"",""]+@fields.collect{|f|
176
- sprintf("%0.3f",md(f).r2)
177
- }
178
- t.add_row(row)
179
- t.add_horizontal_line
180
- for i in 1..@fields.size
181
- mk=md_k(i)
182
- mk.each{|m|
183
- t.add_row(m.add_table_row)
184
- }
185
- # Report averages
186
- a=average_k(i)
187
- if !a.nil?
188
- t.add_horizontal_line
189
- row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
190
- sprintf("%0.3f",a[f])
191
- }
192
- t.add_row(row)
193
- t.add_horizontal_line
194
-
195
- end
196
-
197
- end
198
-
199
- g=general_averages
200
- t.add_horizontal_line
201
-
202
- row=[_("Overall averages"),"",""]+@fields.collect{|f|
203
- sprintf("%0.3f",g[f])
204
- }
205
- t.add_row(row)
206
- out.parse_table(t)
207
-
208
- out.nl
209
- out << _("Pairwise")+"\n"
210
- td=total_dominance
211
- cd=conditional_dominance
212
- gd=general_dominance
213
- t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
214
- pairs.each{|p|
215
- name=p.join(" - ")
216
- row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
217
- t.add_row(row)
218
- }
219
- out.parse_table(t)
220
- return out
221
- end
222
- class ModelData
223
- attr_reader :contributions
224
- def initialize(name,ds,y_var,fields,r_class)
225
- @name=name
226
- @fields=fields
227
- @contributions=@fields.inject({}){|a,v| a[v]=nil;a}
228
- r_class=Regression::Multiple::RubyEngine if r_class.nil?
229
- @lr=r_class.new(ds,y_var)
230
- end
231
- def add_contribution(f,v)
232
- @contributions[f]=v-r2
233
- end
234
- def r2
235
- @lr.r2
236
- end
237
- def add_table_row
238
- begin
239
- sign=sprintf("%0.3f", @lr.significance)
240
- rescue RuntimeError
241
- sign="???"
242
- end
243
- [@name.join("*"), sprintf("%0.3f",r2), sign] + @fields.collect{|k|
244
- v=@contributions[k]
245
- if v.nil?
246
- "--"
247
- else
248
- sprintf("%0.3f",v)
249
- end
250
- }
251
- end
252
- def summary
253
- out=sprintf("%s: r2=%0.3f(p=%0.2f)\n",@name.join("*"),r2,@lr.significance,@lr.sst)
254
- out << @fields.collect{|k|
255
- v=@contributions[k]
256
- if v.nil?
257
- "--"
258
- else
259
- sprintf("%s=%0.3f",k,v)
260
- end
261
- }.join(" | ")
262
- out << "\n"
263
-
264
- return out
265
- end
266
- end
205
+ @general_averages=get_averages(averages)
206
+ end
207
+ @general_averages
208
+ end
209
+ def create_models
210
+ @models=[]
211
+ @models_data={}
212
+ for i in 1..@fields.size
213
+ c=Statsample::Combination.new(i,@fields.size)
214
+ c.each do |data|
215
+ convert=data.collect {|i1| @fields[i1] }
216
+ @models.push(convert)
217
+ ds_prev=@ds.dup(convert+[@y_var])
218
+ modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
219
+ @models_data[convert.sort]=modeldata
220
+ end
221
+ end
222
+ end
223
+ def summary(report_type=ConsoleSummary)
224
+ out=""
225
+ out.extend report_type
226
+ out << _("Summary for Dominance Analysis of %s on %s\n") % [@fields.join(", "),@y_var]
227
+ t=Statsample::ReportTable.new
228
+ t.header=["","r2","sign"]+@fields
229
+ row=[_("Model 0"),"",""]+@fields.collect{|f|
230
+ sprintf("%0.3f", md([f]).r2)
231
+ }
232
+ t.add_row(row)
233
+ t.add_horizontal_line
234
+ for i in 1..@fields.size
235
+ mk=md_k(i)
236
+ mk.each{|m|
237
+ t.add_row(m.add_table_row)
238
+ }
239
+ # Report averages
240
+ a=average_k(i)
241
+ if !a.nil?
242
+ t.add_horizontal_line
243
+ row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
244
+ sprintf("%0.3f",a[f])
245
+ }
246
+ t.add_row(row)
247
+ t.add_horizontal_line
248
+
249
+ end
250
+
267
251
  end
268
252
 
253
+ g=general_averages
254
+ t.add_horizontal_line
255
+
256
+ row=[_("Overall averages"),"",""]+@fields.collect{|f|
257
+ sprintf("%0.3f",g[f])
258
+ }
259
+ t.add_row(row)
260
+ out.parse_table(t)
261
+
262
+ out.nl
263
+ out << _("Pairwise")+"\n"
264
+ td=total_dominance
265
+ cd=conditional_dominance
266
+ gd=general_dominance
267
+ t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
268
+ pairs.each{|p|
269
+ name=p.join(" - ")
270
+ row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
271
+ t.add_row(row)
272
+ }
273
+ out.parse_table(t)
274
+ return out
275
+ end
276
+ class ModelData
277
+ attr_reader :contributions
278
+ def initialize(name,ds,y_var,fields,r_class)
279
+ @name=name
280
+ @fields=fields
281
+ @contributions=@fields.inject({}){|a,v| a[v]=nil;a}
282
+ r_class=Regression::Multiple::RubyEngine if r_class.nil?
283
+ @lr=r_class.new(ds,y_var)
284
+ end
285
+ def add_contribution(f,v)
286
+ @contributions[f]=v-r2
287
+ end
288
+ def r2
289
+ @lr.r2
290
+ end
291
+ def add_table_row
292
+ begin
293
+ sign=sprintf("%0.3f", @lr.significance)
294
+ rescue RuntimeError
295
+ sign="???"
296
+ end
297
+ [@name.join("*"), sprintf("%0.3f",r2), sign] + @fields.collect{|k|
298
+ v=@contributions[k]
299
+ if v.nil?
300
+ "--"
301
+ else
302
+ sprintf("%0.3f",v)
303
+ end
304
+ }
305
+ end
306
+ def summary
307
+ out=sprintf("%s: r2=%0.3f(p=%0.2f)\n",@name.join("*"),r2,@lr.significance,@lr.sst)
308
+ out << @fields.collect{|k|
309
+ v=@contributions[k]
310
+ if v.nil?
311
+ "--"
312
+ else
313
+ sprintf("%s=%0.3f",k,v)
314
+ end
315
+ }.join(" | ")
316
+ out << "\n"
317
+ return out
318
+ end
319
+ end # end ModelData
320
+ end # end Dominance Analysis
269
321
  end
@@ -1,5 +1,8 @@
1
1
  module Statsample
2
2
  class DominanceAnalysis
3
+ # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
4
+ # References:
5
+ # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
3
6
  class Bootstrap
4
7
  include GetText
5
8
  include Writable
@@ -3,18 +3,44 @@ module Statsample
3
3
  module Regression
4
4
  # Module for Linear Multiple Regression Analysis.
5
5
  #
6
- # You can call Regression::Multiple.listwise or Regression::Multiple.pairwise or instance directly the engines.
6
+ # You can call Statsample::Regression::Multiple.listwise, Statsample::Regression::Multiple.pairwise or instance directly the engines.
7
7
  #
8
- # Example.
8
+ # Use:.
9
9
  #
10
10
  # require 'statsample'
11
- # @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
12
- # @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
13
- # @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
14
- # @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
15
- # ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
16
- # lr=Statsample::Regression::Multiple.listwise(ds,'y')
17
- # #<Statsample::Regression::Multiple::AlglibEngine:0x7f21912e4758 @ds_valid=#<Statsample::Dataset:69891073182680 @fields=[a,b,c,y] labels={"a"=>nil, "b"=>nil, "y"=>nil, "c"=>nil} cases=10, @lr=#<Alglib::LinearRegression:0x7f21912df118 @model=#<Alglib_ext::LinearModel:0x7f21912df708>, @ivars=3, @cases=10, @report=#<Alglib_ext::LrReport:0x7f21912df168>>, @y_var="y", @ds=#<Statsample::Dataset:69891073182680 @fields=[a,b,c,y] labels={"a"=>nil, "b"=>nil, "y"=>nil, "c"=>nil} cases=10, @fields=["a", "b", "c"], @lr_s=nil, @dep_columns=[[1, 3, 2, 4, 3, 5, 4, 6, 5, 7], [3, 3, 4, 4, 5, 5, 6, 6, 4, 4], [11, 22, 30, 40, 50, 65, 78, 79, 99, 100]], @ds_indep=#<Statsample::Dataset:69891073180060 @fields=[a,b,c] labels={"a"=>nil, "b"=>nil, "c"=>nil} cases=10, @dy=Vector(type:scale, n:10)[3,4,5,6,7,8,9,10,20,30]>
11
+ # a=1000.times.collect {rand}.to_scale
12
+ # b=1000.times.collect {rand}.to_scale
13
+ # c=1000.times.collect {rand}.to_scale
14
+ # ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
15
+ # ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
16
+ # lr=Statsample::Regression::Multiple.listwise(ds,'y')
17
+ # puts lr.summary
18
+ # Summary for regression of a,b,c over y
19
+ # *************************************************************
20
+ # Engine: Statsample::Regression::Multiple::AlglibEngine
21
+ # Cases(listwise)=1000(1000)
22
+ # r=0.986
23
+ # r2=0.973
24
+ # Equation=0.504+5.011a + 2.995b + 1.988c
25
+ # ----------------------------
26
+ # ANOVA TABLE
27
+ # --------------------------------------------------------------
28
+ # | source | ss | df | ms | f | s |
29
+ # --------------------------------------------------------------
30
+ # | Regression | 2979.321 | 3 | 993.107 | 12040.067 | 0.000 |
31
+ # | Error | 82.154 | 996 | 0.082 | | |
32
+ # | Total | 3061.475 | 999 | | | |
33
+ # --------------------------------------------------------------
34
+ # Beta coefficientes
35
+ # -----------------------------------------------
36
+ # | coeff | b | beta | se | t |
37
+ # -----------------------------------------------
38
+ # | Constant | 0.504 | - | 0.030 | 16.968 |
39
+ # | a | 5.011 | 0.832 | 0.031 | 159.486 |
40
+ # | b | 2.995 | 0.492 | 0.032 | 94.367 |
41
+ # | c | 1.988 | 0.323 | 0.032 | 62.132 |
42
+ # -----------------------------------------------
43
+ #
18
44
  module Multiple
19
45
  # Creates an object for listwise regression.
20
46
  # Alglib is faster, so is prefered over GSL
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsample
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-02-06 00:00:00 -03:00
12
+ date: 2010-02-08 00:00:00 -03:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency