statsample 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.3.1 / 2009-08-03
2
+
3
+ * Name and logic of Regression classes changed. Now, you have Regression::Simple class and Regression::Multiple module with two engines: RubyEngine and AlglibEngne
4
+ * New Crosstab#summary
5
+
1
6
  === 0.3.0 / 2009-08-02
2
7
 
3
8
  * Statsample renamed to Statsample
data/Manifest.txt CHANGED
@@ -5,15 +5,20 @@ Rakefile
5
5
  bin/statsample
6
6
  demo/benchmark.rb
7
7
  demo/chi-square.rb
8
+ demo/crosstab.rb
8
9
  demo/dice.rb
9
10
  demo/distribution_t.rb
10
11
  demo/graph.rb
11
12
  demo/item_analysis.rb
12
13
  demo/mean.rb
14
+ demo/nunnally_6.rb
13
15
  demo/proportion.rb
16
+ demo/regression.rb
14
17
  demo/sample_test.csv
15
18
  demo/strata_proportion.rb
16
19
  demo/stratum.rb
20
+ demo/t-student.rb
21
+ lib/spss.rb
17
22
  lib/statsample.rb
18
23
  lib/statsample/anova.rb
19
24
  lib/statsample/bivariate.rb
@@ -25,19 +30,22 @@ lib/statsample/dataset.rb
25
30
  lib/statsample/dominanceanalysis.rb
26
31
  lib/statsample/dominanceanalysis/bootstrap.rb
27
32
  lib/statsample/graph/gdchart.rb
28
- lib/statsample/graph/svggraph.rb
29
33
  lib/statsample/graph/svgboxplot.rb
34
+ lib/statsample/graph/svggraph.rb
30
35
  lib/statsample/graph/svghistogram.rb
31
36
  lib/statsample/graph/svgscatterplot.rb
32
37
  lib/statsample/htmlreport.rb
33
38
  lib/statsample/multiset.rb
34
39
  lib/statsample/regression.rb
40
+ lib/statsample/regression/multiple.rb
41
+ lib/statsample/regression/multiple/alglibengine.rb
42
+ lib/statsample/regression/multiple/rubyengine.rb
43
+ lib/statsample/regression/simple.rb
35
44
  lib/statsample/reliability.rb
36
45
  lib/statsample/resample.rb
37
46
  lib/statsample/srs.rb
38
47
  lib/statsample/test.rb
39
48
  lib/statsample/vector.rb
40
- lib/spss.rb
41
49
  test/_test_chart.rb
42
50
  test/test_anova.rb
43
51
  test/test_codification.rb
@@ -50,7 +58,10 @@ test/test_multiset.rb
50
58
  test/test_regression.rb
51
59
  test/test_reliability.rb
52
60
  test/test_resample.rb
61
+ test/test_srs.rb
53
62
  test/test_statistics.rb
54
63
  test/test_stratified.rb
55
64
  test/test_svg_graph.rb
56
65
  test/test_vector.rb
66
+ test/test_xls.rb
67
+ test/test_xls.xls
data/demo/benchmark.rb CHANGED
@@ -29,7 +29,7 @@ v.type=:scale
29
29
  if (true)
30
30
  Benchmark.bm(7) do |x|
31
31
  x.report("mean") { for i in 1..n; v.mean; end }
32
- x.report("slow_mean") { for i in 1..n; v.slow_mean; end }
32
+ x.report("slow_mean") { for i in 1..n; v.mean_slow; end }
33
33
 
34
34
  end
35
35
 
data/demo/crosstab.rb ADDED
@@ -0,0 +1,7 @@
1
+ require './../lib/statsample'
2
+ a=[1,1,1,1,1,1,1,2,2,2,2,2,3,3,3].to_vector
3
+ b=[1,2,3,2,2,2,1,1,1,2,2,1,2,2,3].to_vector
4
+
5
+ ct=Statsample::Crosstab.new(a,b)
6
+ puts ct.summary
7
+
@@ -0,0 +1,34 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+
3
+ x1=[7,12,15,10,19,13,10,12,15,14].to_vector(:scale)
4
+ x2=[9,6,8,8,9,8,6,8,10,9].to_vector(:scale)
5
+ x3=[7,15,13,9,12,12,13,11,9,10].to_vector(:scale)
6
+
7
+ puts Statsample::Bivariate.pearson(x1,x2)
8
+ puts Statsample::Bivariate.pearson(x2,x3)
9
+ puts Statsample::Bivariate.pearson(x1,x3)
10
+
11
+ puts "Residual x1.x3"
12
+ res1=Statsample::Bivariate.residuals(x1,x3)
13
+ puts res1
14
+ puts "Residual x2.x3"
15
+ res2=Statsample::Bivariate.residuals(x2,x3)
16
+ puts res2
17
+
18
+ puts "Residual x1.x2"
19
+ res3=Statsample::Bivariate.residuals(x1,x2)
20
+ puts res3
21
+ puts "Residual x3.x2"
22
+ res4=Statsample::Bivariate.residuals(x3,x2)
23
+ puts res4
24
+
25
+ puts "Partial correlation de 1 y 2, controlando 3"
26
+ puts Statsample::Bivariate.pearson(res1,res2)
27
+ puts Statsample::Bivariate.partial_correlation(x1,x2,x3)
28
+
29
+ puts "Partial correlation de 1 y 3, controlando 2"
30
+ puts Statsample::Bivariate.pearson(res3,res4)
31
+ puts Statsample::Bivariate.partial_correlation(x1,x3,x2)
32
+
33
+ puts "Partial correlation de 2 y 3, controlando 1"
34
+ puts Statsample::Bivariate.partial_correlation(x2,x3,x1)
data/demo/proportion.rb CHANGED
@@ -7,7 +7,7 @@ tests=3000
7
7
  sample_size=100
8
8
  # rand a 50%
9
9
  poblacion=([1]*500+[0]*500).to_vector(:scale)
10
- prop=poblacion.proportion(1.0)
10
+ prop=poblacion.proportion(1)
11
11
  puts "Estadísticos"
12
12
  puts "DE con reemplazo:"+Statsample::SRS.proportion_sd_kp_wr(prop, sample_size).to_s
13
13
  puts "DE sin reemplazo:"+Statsample::SRS.proportion_sd_kp_wor(prop, sample_size,poblacion.size).to_s
@@ -0,0 +1,46 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ tests=300
3
+ include Statsample
4
+ r = GSL::Rng.alloc(GSL::Rng::TAUS,Time.now.to_i)
5
+ ds=Dataset.new(%w{a b c d y})
6
+ ds['a'].type=:scale
7
+ ds['b'].type=:scale
8
+ ds['c'].type=:scale
9
+ ds['d'].type=:scale
10
+ ds['y'].type=:scale
11
+
12
+ tests.times {
13
+ a=r.ugaussian
14
+ b=r.ugaussian
15
+ c=r.ugaussian
16
+ d=r.ugaussian
17
+ y=a*70+b*30+c*5+r.ugaussian*5
18
+ ds.add_case_array([a,b,c,d,y])
19
+ }
20
+ ds.update_valid_data
21
+
22
+ if !File.exists? "regression.dab"
23
+ da=DominanceAnalysis::Bootstrap.new(ds,"y")
24
+ else
25
+ da=Statsample.load("regression.dab")
26
+ end
27
+
28
+ da.lr_class=Regression::Multiple::AlglibEngine
29
+ da.bootstrap(20)
30
+
31
+ puts da.summary
32
+ da.save("regression.dab")
33
+
34
+ lr=Regression::Multiple.listwise(ds,"y")
35
+
36
+ hr=HtmlReport.new("Regression")
37
+ hr.add_summary("Regression",lr.summary(HtmlSummary))
38
+ hr.add_summary("Analisis de Dominancia ", da.da.summary(HtmlSummary))
39
+
40
+ hr.add_summary("Analisis de Dominancia (Bootstrap)", da.summary(HtmlSummary))
41
+
42
+ da.fields.each{|f|
43
+ hr.add_histogram("General Dominance #{f}",da.samples_ga[f].to_vector(:scale))
44
+ }
45
+ hr.save("Regression Dominance.html")
46
+
data/demo/t-student.rb ADDED
@@ -0,0 +1,17 @@
1
+ require File.dirname(__FILE__)+"/../lib/statsample"
2
+
3
+
4
+ tests=3000
5
+
6
+ r = GSL::Rng.alloc(GSL::Rng::TAUS, 1)
7
+ sample_sizes=[5,10,20,30]
8
+ sample_sizes.each{|sample_size|
9
+ monte=Statsample::Resample.repeat_and_save(tests) {
10
+ v=[]
11
+ sample_size.times{|i|
12
+ v.push(r.ugaussian)
13
+ }
14
+ v.to_vector(:scale).mean
15
+
16
+ }
17
+ }
data/lib/statsample.rb CHANGED
@@ -58,7 +58,7 @@ end
58
58
  # :startdoc:
59
59
  #
60
60
  module Statsample
61
- VERSION = '0.3.0'
61
+ VERSION = '0.3.1'
62
62
  SPLIT_TOKEN = ","
63
63
  autoload(:Database, 'statsample/converters')
64
64
  autoload(:Anova, 'statsample/anova')
@@ -74,7 +74,6 @@ module Statsample
74
74
  autoload(:Reliability, 'statsample/reliability')
75
75
  autoload(:Bivariate, 'statsample/bivariate')
76
76
  autoload(:Multivariate, 'statsample/multivariate')
77
-
78
77
  autoload(:Regression, 'statsample/regression')
79
78
  autoload(:Test, 'statsample/test')
80
79
  def self.load(filename)
@@ -134,10 +133,10 @@ module Statsample
134
133
  end
135
134
  class ReportTable
136
135
  attr_reader :header
137
- def initialize(header=[])
138
- @header=header
136
+ def initialize(h=[])
139
137
  @rows=[]
140
138
  @max_cols=[]
139
+ self.header=(h)
141
140
  end
142
141
  def add_row(row)
143
142
  row.each_index{|i|
@@ -62,7 +62,7 @@ module Statsample
62
62
  # Chi square, based on expected and real matrix
63
63
  def chi_square
64
64
  require 'statsample/test'
65
- Statsample::Test.chi_square(self.to_matrix,matrix_expected)
65
+ Statsample::Test.chi_square(self.to_matrix, matrix_expected)
66
66
  end
67
67
  # Useful to obtain chi square
68
68
  def matrix_expected
@@ -78,6 +78,39 @@ module Statsample
78
78
  }
79
79
  Matrix.rows(m)
80
80
  end
81
+ def summary(report_type=ConsoleSummary)
82
+ out=""
83
+ out.extend report_type
84
+ fq=frequencies
85
+ rn=rows_names
86
+ cn=cols_names
87
+ total=0
88
+ total_cols=cn.inject({}) {|a,x| a[x]=0;a}
89
+ out.add "Chi Square: #{chi_square}"
90
+ t=Statsample::ReportTable.new([""]+cols_names+["Total"])
91
+ rn.each{|row|
92
+ total_row=0
93
+ t_row=[@v_rows.labeling(row)]
94
+ cn.each{|col|
95
+ data=fq[[row,col]]
96
+ total_row+=fq[[row,col]]
97
+ total+=fq[[row,col]]
98
+ total_cols[col]+=fq[[row,col]]
99
+ t_row.push(data)
100
+ }
101
+ t_row.push(total_row)
102
+ t.add_row(t_row)
103
+ }
104
+ t.add_horizontal_line
105
+ t_row=["Total"]
106
+ cn.each{|v|
107
+ t_row.push(total_cols[v])
108
+ }
109
+ t_row.push(total)
110
+ t.add_row(t_row)
111
+ out.parse_table(t)
112
+ out
113
+ end
81
114
  def to_s
82
115
  fq=frequencies
83
116
  rn=rows_names
@@ -1,7 +1,7 @@
1
1
  require 'statsample/dominanceanalysis/bootstrap'
2
2
  module Statsample
3
3
  class DominanceAnalysis
4
- def initialize(ds,y_var, r_class = Regression::MultipleRegressionPairwise)
4
+ def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
5
5
  @y_var=y_var
6
6
  @dy=ds[@y_var]
7
7
  @ds=ds
@@ -220,7 +220,7 @@ module Statsample
220
220
  @name=name
221
221
  @fields=fields
222
222
  @contributions=@fields.inject({}){|a,v| a[v]=nil;a}
223
- r_class=Regression::MultipleRegressionPairwise if r_class.nil?
223
+ r_class=Regression::Multiple::RubyEngine if r_class.nil?
224
224
  @lr=r_class.new(ds,y_var)
225
225
  end
226
226
  def add_contribution(f,v)
@@ -11,7 +11,7 @@ class DominanceAnalysis
11
11
  @fields=ds.fields-[y_var]
12
12
  @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
13
13
  @n_samples=0
14
- @lr_class=Regression::MultipleRegressionPairwise
14
+ @lr_class=Regression::Multiple::RubyEngine
15
15
  create_samples_pairs
16
16
  end
17
17
  def lr_class=(lr)
@@ -68,6 +68,7 @@ class DominanceAnalysis
68
68
  out.add "Summary for Bootstrap Dominance Analysis of "+@fields.join(", ")+" over "+@y_var+"\n"
69
69
  out.add "Size of sample: #{@n_samples}\n"
70
70
  out.add "t:#{t}\n"
71
+ out.add "Linear Regression Engine: #{@lr_class.name}"
71
72
  out.nl
72
73
  table=ReportTable.new
73
74
  header=["pairs","sD","Dij","SE(Dij)","Pij","Pji","Pno","Reprod"]
@@ -1,522 +1,10 @@
1
+ require 'statsample/regression/simple'
2
+ require 'statsample/regression/multiple'
3
+ require 'statsample/regression/multiple/alglibengine'
4
+ require 'statsample/regression/multiple/rubyengine'
5
+
1
6
  module Statsample
2
- # module for regression methods
7
+ # Module for regression procedures
3
8
  module Regression
4
- # Class for calculation of linear regressions
5
- # To create a SimpleRegression object:
6
- # * <tt> SimpleRegression.new_from_vectors(vx,vy)</tt>
7
- # * <tt> SimpleRegression.new_from_gsl(gsl) </tt>
8
- #
9
- class SimpleRegression
10
- attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
11
- private_class_method :new
12
- def initialize(init_method, *argv)
13
- self.send(init_method, *argv)
14
- end
15
- def y(val_x)
16
- @a+@b*val_x
17
- end
18
- def x(val_y)
19
- (val_y-@a) / @b.to_f
20
- end
21
- # Sum of square error
22
- def sse
23
- (0...@vx.size).inject(0) {|acum,i|
24
- acum+((@vy[i]-y(@vx[i]))**2)
25
- }
26
- end
27
- def standard_error
28
- Math::sqrt(sse / (@vx.size-2).to_f)
29
- end
30
- # Sum of square regression
31
- def ssr
32
- vy_mean=@vy.mean
33
- (0...@vx.size).inject(0) {|a,i|
34
- a+((y(@vx[i])-vy_mean)**2)
35
- }
36
-
37
- end
38
- # Sum of square total
39
- def sst
40
- @vy.sum_of_squared_deviation
41
- end
42
- # Value of r
43
- def r
44
- @b * (@vx.sds / @vy.sds)
45
- end
46
- # Value of r^2
47
- def r2
48
- r**2
49
- end
50
- class << self
51
- def new_from_gsl(ar)
52
- new(:init_gsl, *ar)
53
- end
54
- def new_from_vectors(vx,vy)
55
- new(:init_vectors,vx,vy)
56
- end
57
- end
58
- def init_vectors(vx,vy)
59
- @vx,@vy=Statsample.only_valid(vx,vy)
60
- x_m=@vx.mean
61
- y_m=@vy.mean
62
- num=den=0
63
- (0...@vx.size).each {|i|
64
- num+=(@vx[i]-x_m)*(@vy[i]-y_m)
65
- den+=(@vx[i]-x_m)**2
66
- }
67
- @b=num.to_f/den
68
- @a=y_m - @b*x_m
69
- end
70
- def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
71
- @a=a
72
- @b=b
73
- @cov00=cov00
74
- @cov01=cov01
75
- @covx1=covx1
76
- @chisq=chisq
77
- @status=status
78
- end
79
- end
80
-
81
-
82
- class MultipleRegressionBase
83
- def initialize(ds,y_var)
84
- @ds=ds
85
- @y_var=y_var
86
- @r2=nil
87
-
88
- end
89
- def assign_names(c)
90
- a={}
91
- @fields.each_index {|i|
92
- a[@fields[i]]=c[i]
93
- }
94
- a
95
- end
96
- def predicted
97
- (0...@ds.cases).collect { |i|
98
- invalid=false
99
- vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
100
- if invalid
101
- nil
102
- else
103
- process(vect)
104
- end
105
- }.to_vector(:scale)
106
- end
107
- def standarized_predicted
108
- predicted.standarized
109
- end
110
- def residuals
111
- (0...@ds.cases).collect{|i|
112
- invalid=false
113
- vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
114
- if invalid or @ds[@y_var][i].nil?
115
- nil
116
- else
117
- @ds[@y_var][i] - process(vect)
118
- end
119
- }.to_vector(:scale)
120
- end
121
- def r
122
- raise "You should implement this"
123
- end
124
- def sst
125
- raise "You should implement this"
126
- end
127
- def ssr
128
- r2*sst
129
- end
130
- def sse
131
- sst - ssr
132
- end
133
-
134
- def coeffs_t
135
- out={}
136
- se=coeffs_se
137
- coeffs.each{|k,v|
138
- out[k]=v / se[k]
139
- }
140
- out
141
- end
142
-
143
- def mse
144
- sse/df_e
145
- end
146
-
147
- def df_r
148
- @dep_columns.size
149
- end
150
- def df_e
151
- @ds_valid.cases-@dep_columns.size-1
152
- end
153
- def f
154
- (ssr.quo(df_r)).quo(sse.quo(df_e))
155
- end
156
- # Significance of Fisher
157
- def significance
158
- if HAS_GSL
159
- GSL::Cdf.fdist_Q(f,df_r,df_e)
160
- else
161
- raise "Need Ruby/GSL"
162
- end
163
- end
164
- # Tolerance for a given variable
165
- # http://talkstats.com/showthread.php?t=5056
166
- def tolerance(var)
167
- ds=assign_names(@dep_columns)
168
- ds.each{|k,v|
169
- ds[k]=v.to_vector(:scale)
170
- }
171
- if HAS_ALGIB
172
- lr_class=::Statsample::Regression::MultipleRegressionAlglib
173
- ds=ds.to_dataset
174
- else
175
- lr_class=MultipleRegressionPairwise
176
- ds=ds.to_dataset.dup_only_valid
177
- end
178
- lr=lr_class.new(ds,var)
179
- 1-lr.r2
180
- end
181
- def coeffs_tolerances
182
- @fields.inject({}) {|a,f|
183
- a[f]=tolerance(f);
184
- a
185
- }
186
- end
187
- def coeffs_se
188
- out={}
189
- mse=sse.quo(df_e)
190
- coeffs.each {|k,v|
191
- out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
192
- }
193
- out
194
- end
195
- def estimated_variance_covariance_matrix
196
- mse_p=mse
197
- columns=[]
198
- @ds_valid.each_vector{|k,v|
199
- columns.push(v.data) unless k==@y_var
200
- }
201
- columns.unshift([1.0]*@ds_valid.cases)
202
- x=Matrix.columns(columns)
203
- matrix=((x.t*x)).inverse * mse
204
- matrix.collect {|i|
205
-
206
- Math::sqrt(i) if i>0
207
- }
208
- end
209
- def constant_t
210
- constant.to_f/constant_se
211
- end
212
- def constant_se
213
- estimated_variance_covariance_matrix[0,0]
214
- end
215
- def summary(report_type=ConsoleSummary)
216
- c=coeffs
217
- out=""
218
- out.extend report_type
219
- out.add <<HEREDOC
220
- Summary for regression of #{@fields.join(',')} over #{@y_var}"
221
- *************************************************************
222
- Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
223
- r=#{sprintf("%0.3f",r)}
224
- r2=#{sprintf("%0.3f",r2)}
225
- ssr=#{sprintf("%0.3f",ssr)}
226
- sse=#{sprintf("%0.3f",sse)}
227
- sst=#{sprintf("%0.3f",sst)}
228
- F#{sprintf("(%d,%d)=%0.3f, p=%0.3f",df_r,df_e,f,significance)}
229
- Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
230
-
231
- HEREDOC
232
-
233
- end
234
-
235
-
236
- # Deprecated
237
- # Sum of squares of error (manual calculation)
238
- # using the predicted value minus the y_i value
239
- def sse_manual
240
- pr=predicted
241
- cases=0
242
- sse=(0...@ds.cases).inject(0) {|a,i|
243
- if !@dy.data_with_nils[i].nil? and !pr[i].nil?
244
- cases+=1
245
- a+((pr[i]-@dy[i])**2)
246
- else
247
- a
248
- end
249
- }
250
- sse*(min_n_valid-1.0).quo(cases-1)
251
- end
252
- # Sum of squares of regression
253
- # using the predicted value minus y mean
254
- def ssr_direct
255
- mean=@dy.mean
256
- cases=0
257
- ssr=(0...@ds.cases).inject(0) {|a,i|
258
- invalid=false
259
- v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
260
- if !invalid
261
- cases+=1
262
- a+((process(v)-mean)**2)
263
- else
264
- a
265
- end
266
- }
267
- ssr
268
- end
269
- def sse_direct
270
- sst-ssr
271
- end
272
-
273
-
274
- end
275
-
276
-
277
-
278
-
279
-
280
- if HAS_ALGIB
281
- # Class for calculation of multiple regression.
282
- # Requires Alglib gem.
283
- # To create a SimpleRegression object:
284
- # @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
285
- # @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
286
- # @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
287
- # @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
288
- # ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
289
- # lr=Statsample::Regression::MultipleRegression.new(ds,'y')
290
- #
291
- class MultipleRegressionAlglib < MultipleRegressionBase
292
- def initialize(ds,y_var)
293
- @ds=ds.dup_only_valid
294
- @ds_valid=@ds
295
- @y_var=y_var
296
- @dy=@ds[@y_var]
297
- @ds_indep=ds.dup(ds.fields-[y_var])
298
- # Create a custom matrix
299
- columns=[]
300
- @fields=[]
301
- @ds.fields.each{|f|
302
- if f!=@y_var
303
- columns.push(@ds[f].to_a)
304
- @fields.push(f)
305
- end
306
- }
307
- @dep_columns=columns.dup
308
- columns.push(@ds[@y_var])
309
- matrix=Matrix.columns(columns)
310
- @lr_s=nil
311
- @lr=::Alglib::LinearRegression.build_from_matrix(matrix)
312
- end
313
-
314
- def _dump(i)
315
- Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
316
- end
317
- def self._load(data)
318
- h=Marshal.load(data)
319
- MultipleRegression.new(h['ds'], h['y_var'])
320
- end
321
-
322
- def coeffs
323
- assign_names(@lr.coeffs)
324
- end
325
- # Coefficients using a constant
326
- # Based on http://www.xycoon.com/ols1.htm
327
- def matrix_resolution
328
- mse_p=mse
329
- columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
330
- columns.unshift([1.0]*@ds.cases)
331
- y=Matrix.columns([@dy.data.map {|i| i.to_f}])
332
- x=Matrix.columns(columns)
333
- xt=x.t
334
- matrix=((xt*x)).inverse*xt
335
- matrix*y
336
- end
337
- def r2
338
- r**2
339
- end
340
- def r
341
- Bivariate::pearson(@dy,predicted)
342
- end
343
- def sst
344
- @dy.ss
345
- end
346
- def constant
347
- @lr.constant
348
- end
349
- def standarized_coeffs
350
- l=lr_s
351
- assign_names(l.coeffs)
352
- end
353
- def lr_s
354
- if @lr_s.nil?
355
- build_standarized
356
- end
357
- @lr_s
358
- end
359
- def build_standarized
360
- @ds_s=@ds.standarize
361
- columns=[]
362
- @ds_s.fields.each{|f|
363
- columns.push(@ds_s[f].to_a) unless f==@y_var
364
- }
365
- @dep_columns_s=columns.dup
366
- columns.push(@ds_s[@y_var])
367
- matrix=Matrix.columns(columns)
368
- @lr_s=Alglib::LinearRegression.build_from_matrix(matrix)
369
- end
370
- def process(v)
371
- @lr.process(v)
372
- end
373
- def process_s(v)
374
- lr_s.process(v)
375
- end
376
- # ???? Not equal to SPSS output
377
- def standarized_residuals
378
- res=residuals
379
- red_sd=residuals.sds
380
- res.collect {|v|
381
- v.quo(red_sd)
382
- }.to_vector(:scale)
383
- end
384
- end
385
- end
386
-
387
-
388
-
389
-
390
-
391
-
392
-
393
-
394
-
395
-
396
-
397
-
398
- class MultipleRegressionPairwise < MultipleRegressionBase
399
- def initialize(ds,y_var)
400
- super
401
- @dy=ds[@y_var]
402
- @ds_valid=ds.dup_only_valid
403
- @ds_indep=ds.dup(ds.fields-[y_var])
404
- @fields=@ds_indep.fields
405
- set_dep_columns
406
- obtain_y_vector
407
- @matrix_x = Bivariate.correlation_matrix(@ds_indep)
408
- @coeffs_stan=(@matrix_x.inverse * @matrix_y).column(0).to_a
409
- @min_n_valid=nil
410
- end
411
- def min_n_valid
412
- if @min_n_valid.nil?
413
- min=@ds.cases
414
- m=Bivariate::n_valid_matrix(@ds)
415
- for x in 0...m.row_size
416
- for y in 0...m.column_size
417
- min=m[x,y] if m[x,y] < min
418
- end
419
- end
420
- @min_n_valid=min
421
- end
422
- @min_n_valid
423
- end
424
- def set_dep_columns
425
- @dep_columns=[]
426
- @ds_indep.each_vector{|k,v|
427
- @dep_columns.push(v.data_with_nils)
428
- }
429
- end
430
- # Sum of square total
431
- def sst
432
- #if @sst.nil?
433
- @sst=@dy.variance*(min_n_valid-1.0)
434
- #end
435
- @sst
436
- end
437
- def r2
438
- if @r2.nil?
439
- c=@matrix_y
440
- rxx=obtain_predictor_matrix
441
- matrix=(c.t*rxx.inverse*c)
442
- @r2=matrix[0,0]
443
- end
444
- @r2
445
- end
446
- def r
447
- Math::sqrt(r2)
448
- end
449
-
450
- def df_e
451
- min_n_valid-@dep_columns.size-1
452
- end
453
- def fix_with_mean
454
- i=0
455
- @ds_indep.each{|row|
456
- empty=[]
457
- row.each{|k,v|
458
- empty.push(k) if v.nil?
459
- }
460
- if empty.size==1
461
- @ds_indep[empty[0]][i]=@ds[empty[0]].mean
462
- end
463
- i+=1
464
- }
465
- @ds_indep.update_valid_data
466
- set_dep_columns
467
- end
468
- def fix_with_regression
469
- i=0
470
- @ds_indep.each{|row|
471
- empty=[]
472
- row.each{|k,v|
473
- empty.push(k) if v.nil?
474
- }
475
- if empty.size==1
476
- field=empty[0]
477
- lr=MultipleRegression.new(@ds_indep,field)
478
- fields=[]
479
- @ds_indep.fields.each{|f|
480
- fields.push(row[f]) unless f==field
481
- }
482
- @ds_indep[field][i]=lr.process(fields)
483
- end
484
- i+=1
485
- }
486
- @ds_indep.update_valid_data
487
- set_dep_columns
488
- end
489
- def obtain_y_vector
490
- @matrix_y=Matrix.columns([@ds_indep.fields.collect{|f|
491
- Bivariate.pearson(@dy, @ds_indep[f])
492
- }])
493
- end
494
- def obtain_predictor_matrix
495
- Bivariate::correlation_matrix(@ds_indep)
496
- end
497
- def constant
498
- c=coeffs
499
- @dy.mean-@fields.inject(0){|a,k| a+(c[k] * @ds_indep[k].mean)}
500
- end
501
- def process(v)
502
- c=coeffs
503
- total=constant
504
- @fields.each_index{|i|
505
- total+=c[@fields[i]]*v[i]
506
- }
507
- total
508
- end
509
- def coeffs
510
- sc=standarized_coeffs
511
- assign_names(@fields.collect{|f|
512
- (sc[f]*@dy.sds).quo(@ds_indep[f].sds)
513
- })
514
- end
515
- def standarized_coeffs
516
- assign_names(@coeffs_stan)
517
- end
518
- end
519
-
520
-
521
9
  end
522
10
  end