statsample 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.3.1 / 2009-08-03
2
+
3
+ * Name and logic of Regression classes changed. Now, you have Regression::Simple class and Regression::Multiple module with two engines: RubyEngine and AlglibEngne
4
+ * New Crosstab#summary
5
+
1
6
  === 0.3.0 / 2009-08-02
2
7
 
3
8
  * Statsample renamed to Statsample
data/Manifest.txt CHANGED
@@ -5,15 +5,20 @@ Rakefile
5
5
  bin/statsample
6
6
  demo/benchmark.rb
7
7
  demo/chi-square.rb
8
+ demo/crosstab.rb
8
9
  demo/dice.rb
9
10
  demo/distribution_t.rb
10
11
  demo/graph.rb
11
12
  demo/item_analysis.rb
12
13
  demo/mean.rb
14
+ demo/nunnally_6.rb
13
15
  demo/proportion.rb
16
+ demo/regression.rb
14
17
  demo/sample_test.csv
15
18
  demo/strata_proportion.rb
16
19
  demo/stratum.rb
20
+ demo/t-student.rb
21
+ lib/spss.rb
17
22
  lib/statsample.rb
18
23
  lib/statsample/anova.rb
19
24
  lib/statsample/bivariate.rb
@@ -25,19 +30,22 @@ lib/statsample/dataset.rb
25
30
  lib/statsample/dominanceanalysis.rb
26
31
  lib/statsample/dominanceanalysis/bootstrap.rb
27
32
  lib/statsample/graph/gdchart.rb
28
- lib/statsample/graph/svggraph.rb
29
33
  lib/statsample/graph/svgboxplot.rb
34
+ lib/statsample/graph/svggraph.rb
30
35
  lib/statsample/graph/svghistogram.rb
31
36
  lib/statsample/graph/svgscatterplot.rb
32
37
  lib/statsample/htmlreport.rb
33
38
  lib/statsample/multiset.rb
34
39
  lib/statsample/regression.rb
40
+ lib/statsample/regression/multiple.rb
41
+ lib/statsample/regression/multiple/alglibengine.rb
42
+ lib/statsample/regression/multiple/rubyengine.rb
43
+ lib/statsample/regression/simple.rb
35
44
  lib/statsample/reliability.rb
36
45
  lib/statsample/resample.rb
37
46
  lib/statsample/srs.rb
38
47
  lib/statsample/test.rb
39
48
  lib/statsample/vector.rb
40
- lib/spss.rb
41
49
  test/_test_chart.rb
42
50
  test/test_anova.rb
43
51
  test/test_codification.rb
@@ -50,7 +58,10 @@ test/test_multiset.rb
50
58
  test/test_regression.rb
51
59
  test/test_reliability.rb
52
60
  test/test_resample.rb
61
+ test/test_srs.rb
53
62
  test/test_statistics.rb
54
63
  test/test_stratified.rb
55
64
  test/test_svg_graph.rb
56
65
  test/test_vector.rb
66
+ test/test_xls.rb
67
+ test/test_xls.xls
data/demo/benchmark.rb CHANGED
@@ -29,7 +29,7 @@ v.type=:scale
29
29
  if (true)
30
30
  Benchmark.bm(7) do |x|
31
31
  x.report("mean") { for i in 1..n; v.mean; end }
32
- x.report("slow_mean") { for i in 1..n; v.slow_mean; end }
32
+ x.report("slow_mean") { for i in 1..n; v.mean_slow; end }
33
33
 
34
34
  end
35
35
 
data/demo/crosstab.rb ADDED
@@ -0,0 +1,7 @@
1
+ require './../lib/statsample'
2
+ a=[1,1,1,1,1,1,1,2,2,2,2,2,3,3,3].to_vector
3
+ b=[1,2,3,2,2,2,1,1,1,2,2,1,2,2,3].to_vector
4
+
5
+ ct=Statsample::Crosstab.new(a,b)
6
+ puts ct.summary
7
+
@@ -0,0 +1,34 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+
3
+ x1=[7,12,15,10,19,13,10,12,15,14].to_vector(:scale)
4
+ x2=[9,6,8,8,9,8,6,8,10,9].to_vector(:scale)
5
+ x3=[7,15,13,9,12,12,13,11,9,10].to_vector(:scale)
6
+
7
+ puts Statsample::Bivariate.pearson(x1,x2)
8
+ puts Statsample::Bivariate.pearson(x2,x3)
9
+ puts Statsample::Bivariate.pearson(x1,x3)
10
+
11
+ puts "Residual x1.x3"
12
+ res1=Statsample::Bivariate.residuals(x1,x3)
13
+ puts res1
14
+ puts "Residual x2.x3"
15
+ res2=Statsample::Bivariate.residuals(x2,x3)
16
+ puts res2
17
+
18
+ puts "Residual x1.x2"
19
+ res3=Statsample::Bivariate.residuals(x1,x2)
20
+ puts res3
21
+ puts "Residual x3.x2"
22
+ res4=Statsample::Bivariate.residuals(x3,x2)
23
+ puts res4
24
+
25
+ puts "Partial correlation de 1 y 2, controlando 3"
26
+ puts Statsample::Bivariate.pearson(res1,res2)
27
+ puts Statsample::Bivariate.partial_correlation(x1,x2,x3)
28
+
29
+ puts "Partial correlation de 1 y 3, controlando 2"
30
+ puts Statsample::Bivariate.pearson(res3,res4)
31
+ puts Statsample::Bivariate.partial_correlation(x1,x3,x2)
32
+
33
+ puts "Partial correlation de 2 y 3, controlando 1"
34
+ puts Statsample::Bivariate.partial_correlation(x2,x3,x1)
data/demo/proportion.rb CHANGED
@@ -7,7 +7,7 @@ tests=3000
7
7
  sample_size=100
8
8
  # rand a 50%
9
9
  poblacion=([1]*500+[0]*500).to_vector(:scale)
10
- prop=poblacion.proportion(1.0)
10
+ prop=poblacion.proportion(1)
11
11
  puts "Estadísticos"
12
12
  puts "DE con reemplazo:"+Statsample::SRS.proportion_sd_kp_wr(prop, sample_size).to_s
13
13
  puts "DE sin reemplazo:"+Statsample::SRS.proportion_sd_kp_wor(prop, sample_size,poblacion.size).to_s
@@ -0,0 +1,46 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ tests=300
3
+ include Statsample
4
+ r = GSL::Rng.alloc(GSL::Rng::TAUS,Time.now.to_i)
5
+ ds=Dataset.new(%w{a b c d y})
6
+ ds['a'].type=:scale
7
+ ds['b'].type=:scale
8
+ ds['c'].type=:scale
9
+ ds['d'].type=:scale
10
+ ds['y'].type=:scale
11
+
12
+ tests.times {
13
+ a=r.ugaussian
14
+ b=r.ugaussian
15
+ c=r.ugaussian
16
+ d=r.ugaussian
17
+ y=a*70+b*30+c*5+r.ugaussian*5
18
+ ds.add_case_array([a,b,c,d,y])
19
+ }
20
+ ds.update_valid_data
21
+
22
+ if !File.exists? "regression.dab"
23
+ da=DominanceAnalysis::Bootstrap.new(ds,"y")
24
+ else
25
+ da=Statsample.load("regression.dab")
26
+ end
27
+
28
+ da.lr_class=Regression::Multiple::AlglibEngine
29
+ da.bootstrap(20)
30
+
31
+ puts da.summary
32
+ da.save("regression.dab")
33
+
34
+ lr=Regression::Multiple.listwise(ds,"y")
35
+
36
+ hr=HtmlReport.new("Regression")
37
+ hr.add_summary("Regression",lr.summary(HtmlSummary))
38
+ hr.add_summary("Analisis de Dominancia ", da.da.summary(HtmlSummary))
39
+
40
+ hr.add_summary("Analisis de Dominancia (Bootstrap)", da.summary(HtmlSummary))
41
+
42
+ da.fields.each{|f|
43
+ hr.add_histogram("General Dominance #{f}",da.samples_ga[f].to_vector(:scale))
44
+ }
45
+ hr.save("Regression Dominance.html")
46
+
data/demo/t-student.rb ADDED
@@ -0,0 +1,17 @@
1
+ require File.dirname(__FILE__)+"/../lib/statsample"
2
+
3
+
4
+ tests=3000
5
+
6
+ r = GSL::Rng.alloc(GSL::Rng::TAUS, 1)
7
+ sample_sizes=[5,10,20,30]
8
+ sample_sizes.each{|sample_size|
9
+ monte=Statsample::Resample.repeat_and_save(tests) {
10
+ v=[]
11
+ sample_size.times{|i|
12
+ v.push(r.ugaussian)
13
+ }
14
+ v.to_vector(:scale).mean
15
+
16
+ }
17
+ }
data/lib/statsample.rb CHANGED
@@ -58,7 +58,7 @@ end
58
58
  # :startdoc:
59
59
  #
60
60
  module Statsample
61
- VERSION = '0.3.0'
61
+ VERSION = '0.3.1'
62
62
  SPLIT_TOKEN = ","
63
63
  autoload(:Database, 'statsample/converters')
64
64
  autoload(:Anova, 'statsample/anova')
@@ -74,7 +74,6 @@ module Statsample
74
74
  autoload(:Reliability, 'statsample/reliability')
75
75
  autoload(:Bivariate, 'statsample/bivariate')
76
76
  autoload(:Multivariate, 'statsample/multivariate')
77
-
78
77
  autoload(:Regression, 'statsample/regression')
79
78
  autoload(:Test, 'statsample/test')
80
79
  def self.load(filename)
@@ -134,10 +133,10 @@ module Statsample
134
133
  end
135
134
  class ReportTable
136
135
  attr_reader :header
137
- def initialize(header=[])
138
- @header=header
136
+ def initialize(h=[])
139
137
  @rows=[]
140
138
  @max_cols=[]
139
+ self.header=(h)
141
140
  end
142
141
  def add_row(row)
143
142
  row.each_index{|i|
@@ -62,7 +62,7 @@ module Statsample
62
62
  # Chi square, based on expected and real matrix
63
63
  def chi_square
64
64
  require 'statsample/test'
65
- Statsample::Test.chi_square(self.to_matrix,matrix_expected)
65
+ Statsample::Test.chi_square(self.to_matrix, matrix_expected)
66
66
  end
67
67
  # Useful to obtain chi square
68
68
  def matrix_expected
@@ -78,6 +78,39 @@ module Statsample
78
78
  }
79
79
  Matrix.rows(m)
80
80
  end
81
+ def summary(report_type=ConsoleSummary)
82
+ out=""
83
+ out.extend report_type
84
+ fq=frequencies
85
+ rn=rows_names
86
+ cn=cols_names
87
+ total=0
88
+ total_cols=cn.inject({}) {|a,x| a[x]=0;a}
89
+ out.add "Chi Square: #{chi_square}"
90
+ t=Statsample::ReportTable.new([""]+cols_names+["Total"])
91
+ rn.each{|row|
92
+ total_row=0
93
+ t_row=[@v_rows.labeling(row)]
94
+ cn.each{|col|
95
+ data=fq[[row,col]]
96
+ total_row+=fq[[row,col]]
97
+ total+=fq[[row,col]]
98
+ total_cols[col]+=fq[[row,col]]
99
+ t_row.push(data)
100
+ }
101
+ t_row.push(total_row)
102
+ t.add_row(t_row)
103
+ }
104
+ t.add_horizontal_line
105
+ t_row=["Total"]
106
+ cn.each{|v|
107
+ t_row.push(total_cols[v])
108
+ }
109
+ t_row.push(total)
110
+ t.add_row(t_row)
111
+ out.parse_table(t)
112
+ out
113
+ end
81
114
  def to_s
82
115
  fq=frequencies
83
116
  rn=rows_names
@@ -1,7 +1,7 @@
1
1
  require 'statsample/dominanceanalysis/bootstrap'
2
2
  module Statsample
3
3
  class DominanceAnalysis
4
- def initialize(ds,y_var, r_class = Regression::MultipleRegressionPairwise)
4
+ def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
5
5
  @y_var=y_var
6
6
  @dy=ds[@y_var]
7
7
  @ds=ds
@@ -220,7 +220,7 @@ module Statsample
220
220
  @name=name
221
221
  @fields=fields
222
222
  @contributions=@fields.inject({}){|a,v| a[v]=nil;a}
223
- r_class=Regression::MultipleRegressionPairwise if r_class.nil?
223
+ r_class=Regression::Multiple::RubyEngine if r_class.nil?
224
224
  @lr=r_class.new(ds,y_var)
225
225
  end
226
226
  def add_contribution(f,v)
@@ -11,7 +11,7 @@ class DominanceAnalysis
11
11
  @fields=ds.fields-[y_var]
12
12
  @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
13
13
  @n_samples=0
14
- @lr_class=Regression::MultipleRegressionPairwise
14
+ @lr_class=Regression::Multiple::RubyEngine
15
15
  create_samples_pairs
16
16
  end
17
17
  def lr_class=(lr)
@@ -68,6 +68,7 @@ class DominanceAnalysis
68
68
  out.add "Summary for Bootstrap Dominance Analysis of "+@fields.join(", ")+" over "+@y_var+"\n"
69
69
  out.add "Size of sample: #{@n_samples}\n"
70
70
  out.add "t:#{t}\n"
71
+ out.add "Linear Regression Engine: #{@lr_class.name}"
71
72
  out.nl
72
73
  table=ReportTable.new
73
74
  header=["pairs","sD","Dij","SE(Dij)","Pij","Pji","Pno","Reprod"]
@@ -1,522 +1,10 @@
1
+ require 'statsample/regression/simple'
2
+ require 'statsample/regression/multiple'
3
+ require 'statsample/regression/multiple/alglibengine'
4
+ require 'statsample/regression/multiple/rubyengine'
5
+
1
6
  module Statsample
2
- # module for regression methods
7
+ # Module for regression procedures
3
8
  module Regression
4
- # Class for calculation of linear regressions
5
- # To create a SimpleRegression object:
6
- # * <tt> SimpleRegression.new_from_vectors(vx,vy)</tt>
7
- # * <tt> SimpleRegression.new_from_gsl(gsl) </tt>
8
- #
9
- class SimpleRegression
10
- attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
11
- private_class_method :new
12
- def initialize(init_method, *argv)
13
- self.send(init_method, *argv)
14
- end
15
- def y(val_x)
16
- @a+@b*val_x
17
- end
18
- def x(val_y)
19
- (val_y-@a) / @b.to_f
20
- end
21
- # Sum of square error
22
- def sse
23
- (0...@vx.size).inject(0) {|acum,i|
24
- acum+((@vy[i]-y(@vx[i]))**2)
25
- }
26
- end
27
- def standard_error
28
- Math::sqrt(sse / (@vx.size-2).to_f)
29
- end
30
- # Sum of square regression
31
- def ssr
32
- vy_mean=@vy.mean
33
- (0...@vx.size).inject(0) {|a,i|
34
- a+((y(@vx[i])-vy_mean)**2)
35
- }
36
-
37
- end
38
- # Sum of square total
39
- def sst
40
- @vy.sum_of_squared_deviation
41
- end
42
- # Value of r
43
- def r
44
- @b * (@vx.sds / @vy.sds)
45
- end
46
- # Value of r^2
47
- def r2
48
- r**2
49
- end
50
- class << self
51
- def new_from_gsl(ar)
52
- new(:init_gsl, *ar)
53
- end
54
- def new_from_vectors(vx,vy)
55
- new(:init_vectors,vx,vy)
56
- end
57
- end
58
- def init_vectors(vx,vy)
59
- @vx,@vy=Statsample.only_valid(vx,vy)
60
- x_m=@vx.mean
61
- y_m=@vy.mean
62
- num=den=0
63
- (0...@vx.size).each {|i|
64
- num+=(@vx[i]-x_m)*(@vy[i]-y_m)
65
- den+=(@vx[i]-x_m)**2
66
- }
67
- @b=num.to_f/den
68
- @a=y_m - @b*x_m
69
- end
70
- def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
71
- @a=a
72
- @b=b
73
- @cov00=cov00
74
- @cov01=cov01
75
- @covx1=covx1
76
- @chisq=chisq
77
- @status=status
78
- end
79
- end
80
-
81
-
82
- class MultipleRegressionBase
83
- def initialize(ds,y_var)
84
- @ds=ds
85
- @y_var=y_var
86
- @r2=nil
87
-
88
- end
89
- def assign_names(c)
90
- a={}
91
- @fields.each_index {|i|
92
- a[@fields[i]]=c[i]
93
- }
94
- a
95
- end
96
- def predicted
97
- (0...@ds.cases).collect { |i|
98
- invalid=false
99
- vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
100
- if invalid
101
- nil
102
- else
103
- process(vect)
104
- end
105
- }.to_vector(:scale)
106
- end
107
- def standarized_predicted
108
- predicted.standarized
109
- end
110
- def residuals
111
- (0...@ds.cases).collect{|i|
112
- invalid=false
113
- vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
114
- if invalid or @ds[@y_var][i].nil?
115
- nil
116
- else
117
- @ds[@y_var][i] - process(vect)
118
- end
119
- }.to_vector(:scale)
120
- end
121
- def r
122
- raise "You should implement this"
123
- end
124
- def sst
125
- raise "You should implement this"
126
- end
127
- def ssr
128
- r2*sst
129
- end
130
- def sse
131
- sst - ssr
132
- end
133
-
134
- def coeffs_t
135
- out={}
136
- se=coeffs_se
137
- coeffs.each{|k,v|
138
- out[k]=v / se[k]
139
- }
140
- out
141
- end
142
-
143
- def mse
144
- sse/df_e
145
- end
146
-
147
- def df_r
148
- @dep_columns.size
149
- end
150
- def df_e
151
- @ds_valid.cases-@dep_columns.size-1
152
- end
153
- def f
154
- (ssr.quo(df_r)).quo(sse.quo(df_e))
155
- end
156
- # Significance of Fisher
157
- def significance
158
- if HAS_GSL
159
- GSL::Cdf.fdist_Q(f,df_r,df_e)
160
- else
161
- raise "Need Ruby/GSL"
162
- end
163
- end
164
- # Tolerance for a given variable
165
- # http://talkstats.com/showthread.php?t=5056
166
- def tolerance(var)
167
- ds=assign_names(@dep_columns)
168
- ds.each{|k,v|
169
- ds[k]=v.to_vector(:scale)
170
- }
171
- if HAS_ALGIB
172
- lr_class=::Statsample::Regression::MultipleRegressionAlglib
173
- ds=ds.to_dataset
174
- else
175
- lr_class=MultipleRegressionPairwise
176
- ds=ds.to_dataset.dup_only_valid
177
- end
178
- lr=lr_class.new(ds,var)
179
- 1-lr.r2
180
- end
181
- def coeffs_tolerances
182
- @fields.inject({}) {|a,f|
183
- a[f]=tolerance(f);
184
- a
185
- }
186
- end
187
- def coeffs_se
188
- out={}
189
- mse=sse.quo(df_e)
190
- coeffs.each {|k,v|
191
- out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
192
- }
193
- out
194
- end
195
- def estimated_variance_covariance_matrix
196
- mse_p=mse
197
- columns=[]
198
- @ds_valid.each_vector{|k,v|
199
- columns.push(v.data) unless k==@y_var
200
- }
201
- columns.unshift([1.0]*@ds_valid.cases)
202
- x=Matrix.columns(columns)
203
- matrix=((x.t*x)).inverse * mse
204
- matrix.collect {|i|
205
-
206
- Math::sqrt(i) if i>0
207
- }
208
- end
209
- def constant_t
210
- constant.to_f/constant_se
211
- end
212
- def constant_se
213
- estimated_variance_covariance_matrix[0,0]
214
- end
215
- def summary(report_type=ConsoleSummary)
216
- c=coeffs
217
- out=""
218
- out.extend report_type
219
- out.add <<HEREDOC
220
- Summary for regression of #{@fields.join(',')} over #{@y_var}"
221
- *************************************************************
222
- Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
223
- r=#{sprintf("%0.3f",r)}
224
- r2=#{sprintf("%0.3f",r2)}
225
- ssr=#{sprintf("%0.3f",ssr)}
226
- sse=#{sprintf("%0.3f",sse)}
227
- sst=#{sprintf("%0.3f",sst)}
228
- F#{sprintf("(%d,%d)=%0.3f, p=%0.3f",df_r,df_e,f,significance)}
229
- Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
230
-
231
- HEREDOC
232
-
233
- end
234
-
235
-
236
- # Deprecated
237
- # Sum of squares of error (manual calculation)
238
- # using the predicted value minus the y_i value
239
- def sse_manual
240
- pr=predicted
241
- cases=0
242
- sse=(0...@ds.cases).inject(0) {|a,i|
243
- if !@dy.data_with_nils[i].nil? and !pr[i].nil?
244
- cases+=1
245
- a+((pr[i]-@dy[i])**2)
246
- else
247
- a
248
- end
249
- }
250
- sse*(min_n_valid-1.0).quo(cases-1)
251
- end
252
- # Sum of squares of regression
253
- # using the predicted value minus y mean
254
- def ssr_direct
255
- mean=@dy.mean
256
- cases=0
257
- ssr=(0...@ds.cases).inject(0) {|a,i|
258
- invalid=false
259
- v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
260
- if !invalid
261
- cases+=1
262
- a+((process(v)-mean)**2)
263
- else
264
- a
265
- end
266
- }
267
- ssr
268
- end
269
- def sse_direct
270
- sst-ssr
271
- end
272
-
273
-
274
- end
275
-
276
-
277
-
278
-
279
-
280
- if HAS_ALGIB
281
- # Class for calculation of multiple regression.
282
- # Requires Alglib gem.
283
- # To create a SimpleRegression object:
284
- # @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
285
- # @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
286
- # @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
287
- # @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
288
- # ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
289
- # lr=Statsample::Regression::MultipleRegression.new(ds,'y')
290
- #
291
- class MultipleRegressionAlglib < MultipleRegressionBase
292
- def initialize(ds,y_var)
293
- @ds=ds.dup_only_valid
294
- @ds_valid=@ds
295
- @y_var=y_var
296
- @dy=@ds[@y_var]
297
- @ds_indep=ds.dup(ds.fields-[y_var])
298
- # Create a custom matrix
299
- columns=[]
300
- @fields=[]
301
- @ds.fields.each{|f|
302
- if f!=@y_var
303
- columns.push(@ds[f].to_a)
304
- @fields.push(f)
305
- end
306
- }
307
- @dep_columns=columns.dup
308
- columns.push(@ds[@y_var])
309
- matrix=Matrix.columns(columns)
310
- @lr_s=nil
311
- @lr=::Alglib::LinearRegression.build_from_matrix(matrix)
312
- end
313
-
314
- def _dump(i)
315
- Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
316
- end
317
- def self._load(data)
318
- h=Marshal.load(data)
319
- MultipleRegression.new(h['ds'], h['y_var'])
320
- end
321
-
322
- def coeffs
323
- assign_names(@lr.coeffs)
324
- end
325
- # Coefficients using a constant
326
- # Based on http://www.xycoon.com/ols1.htm
327
- def matrix_resolution
328
- mse_p=mse
329
- columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
330
- columns.unshift([1.0]*@ds.cases)
331
- y=Matrix.columns([@dy.data.map {|i| i.to_f}])
332
- x=Matrix.columns(columns)
333
- xt=x.t
334
- matrix=((xt*x)).inverse*xt
335
- matrix*y
336
- end
337
- def r2
338
- r**2
339
- end
340
- def r
341
- Bivariate::pearson(@dy,predicted)
342
- end
343
- def sst
344
- @dy.ss
345
- end
346
- def constant
347
- @lr.constant
348
- end
349
- def standarized_coeffs
350
- l=lr_s
351
- assign_names(l.coeffs)
352
- end
353
- def lr_s
354
- if @lr_s.nil?
355
- build_standarized
356
- end
357
- @lr_s
358
- end
359
- def build_standarized
360
- @ds_s=@ds.standarize
361
- columns=[]
362
- @ds_s.fields.each{|f|
363
- columns.push(@ds_s[f].to_a) unless f==@y_var
364
- }
365
- @dep_columns_s=columns.dup
366
- columns.push(@ds_s[@y_var])
367
- matrix=Matrix.columns(columns)
368
- @lr_s=Alglib::LinearRegression.build_from_matrix(matrix)
369
- end
370
- def process(v)
371
- @lr.process(v)
372
- end
373
- def process_s(v)
374
- lr_s.process(v)
375
- end
376
- # ???? Not equal to SPSS output
377
- def standarized_residuals
378
- res=residuals
379
- red_sd=residuals.sds
380
- res.collect {|v|
381
- v.quo(red_sd)
382
- }.to_vector(:scale)
383
- end
384
- end
385
- end
386
-
387
-
388
-
389
-
390
-
391
-
392
-
393
-
394
-
395
-
396
-
397
-
398
- class MultipleRegressionPairwise < MultipleRegressionBase
399
- def initialize(ds,y_var)
400
- super
401
- @dy=ds[@y_var]
402
- @ds_valid=ds.dup_only_valid
403
- @ds_indep=ds.dup(ds.fields-[y_var])
404
- @fields=@ds_indep.fields
405
- set_dep_columns
406
- obtain_y_vector
407
- @matrix_x = Bivariate.correlation_matrix(@ds_indep)
408
- @coeffs_stan=(@matrix_x.inverse * @matrix_y).column(0).to_a
409
- @min_n_valid=nil
410
- end
411
- def min_n_valid
412
- if @min_n_valid.nil?
413
- min=@ds.cases
414
- m=Bivariate::n_valid_matrix(@ds)
415
- for x in 0...m.row_size
416
- for y in 0...m.column_size
417
- min=m[x,y] if m[x,y] < min
418
- end
419
- end
420
- @min_n_valid=min
421
- end
422
- @min_n_valid
423
- end
424
- def set_dep_columns
425
- @dep_columns=[]
426
- @ds_indep.each_vector{|k,v|
427
- @dep_columns.push(v.data_with_nils)
428
- }
429
- end
430
- # Sum of square total
431
- def sst
432
- #if @sst.nil?
433
- @sst=@dy.variance*(min_n_valid-1.0)
434
- #end
435
- @sst
436
- end
437
- def r2
438
- if @r2.nil?
439
- c=@matrix_y
440
- rxx=obtain_predictor_matrix
441
- matrix=(c.t*rxx.inverse*c)
442
- @r2=matrix[0,0]
443
- end
444
- @r2
445
- end
446
- def r
447
- Math::sqrt(r2)
448
- end
449
-
450
- def df_e
451
- min_n_valid-@dep_columns.size-1
452
- end
453
- def fix_with_mean
454
- i=0
455
- @ds_indep.each{|row|
456
- empty=[]
457
- row.each{|k,v|
458
- empty.push(k) if v.nil?
459
- }
460
- if empty.size==1
461
- @ds_indep[empty[0]][i]=@ds[empty[0]].mean
462
- end
463
- i+=1
464
- }
465
- @ds_indep.update_valid_data
466
- set_dep_columns
467
- end
468
- def fix_with_regression
469
- i=0
470
- @ds_indep.each{|row|
471
- empty=[]
472
- row.each{|k,v|
473
- empty.push(k) if v.nil?
474
- }
475
- if empty.size==1
476
- field=empty[0]
477
- lr=MultipleRegression.new(@ds_indep,field)
478
- fields=[]
479
- @ds_indep.fields.each{|f|
480
- fields.push(row[f]) unless f==field
481
- }
482
- @ds_indep[field][i]=lr.process(fields)
483
- end
484
- i+=1
485
- }
486
- @ds_indep.update_valid_data
487
- set_dep_columns
488
- end
489
- def obtain_y_vector
490
- @matrix_y=Matrix.columns([@ds_indep.fields.collect{|f|
491
- Bivariate.pearson(@dy, @ds_indep[f])
492
- }])
493
- end
494
- def obtain_predictor_matrix
495
- Bivariate::correlation_matrix(@ds_indep)
496
- end
497
- def constant
498
- c=coeffs
499
- @dy.mean-@fields.inject(0){|a,k| a+(c[k] * @ds_indep[k].mean)}
500
- end
501
- def process(v)
502
- c=coeffs
503
- total=constant
504
- @fields.each_index{|i|
505
- total+=c[@fields[i]]*v[i]
506
- }
507
- total
508
- end
509
- def coeffs
510
- sc=standarized_coeffs
511
- assign_names(@fields.collect{|f|
512
- (sc[f]*@dy.sds).quo(@ds_indep[f].sds)
513
- })
514
- end
515
- def standarized_coeffs
516
- assign_names(@coeffs_stan)
517
- end
518
- end
519
-
520
-
521
9
  end
522
10
  end