statsample 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/Manifest.txt +13 -2
- data/demo/benchmark.rb +1 -1
- data/demo/crosstab.rb +7 -0
- data/demo/nunnally_6.rb +34 -0
- data/demo/proportion.rb +1 -1
- data/demo/regression.rb +46 -0
- data/demo/t-student.rb +17 -0
- data/lib/statsample.rb +3 -4
- data/lib/statsample/crosstab.rb +34 -1
- data/lib/statsample/dominanceanalysis.rb +2 -2
- data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -1
- data/lib/statsample/regression.rb +6 -518
- data/lib/statsample/regression/multiple.rb +259 -0
- data/lib/statsample/regression/multiple/alglibengine.rb +117 -0
- data/lib/statsample/regression/multiple/rubyengine.rb +140 -0
- data/lib/statsample/regression/simple.rb +81 -0
- data/test/test_regression.rb +5 -5
- data/test/test_statistics.rb +2 -12
- data/test/test_xls.xls +0 -0
- metadata +14 -3
@@ -0,0 +1,259 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
# Module for Multiple Regression Analysis
|
4
|
+
# You can call Regression::Multiple.listwise or Regression::Multiple.pairwise or instance directly the engines
|
5
|
+
# Example.
|
6
|
+
#
|
7
|
+
# require 'statsample'
|
8
|
+
# @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
9
|
+
# @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
10
|
+
# @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
11
|
+
# @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
12
|
+
# ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
13
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
14
|
+
# #<Statsample::Regression::Multiple::AlglibEngine:0x7f21912e4758 @ds_valid=#<Statsample::Dataset:69891073182680 @fields=[a,b,c,y] labels={"a"=>nil, "b"=>nil, "y"=>nil, "c"=>nil} cases=10, @lr=#<Alglib::LinearRegression:0x7f21912df118 @model=#<Alglib_ext::LinearModel:0x7f21912df708>, @ivars=3, @cases=10, @report=#<Alglib_ext::LrReport:0x7f21912df168>>, @y_var="y", @ds=#<Statsample::Dataset:69891073182680 @fields=[a,b,c,y] labels={"a"=>nil, "b"=>nil, "y"=>nil, "c"=>nil} cases=10, @fields=["a", "b", "c"], @lr_s=nil, @dep_columns=[[1, 3, 2, 4, 3, 5, 4, 6, 5, 7], [3, 3, 4, 4, 5, 5, 6, 6, 4, 4], [11, 22, 30, 40, 50, 65, 78, 79, 99, 100]], @ds_indep=#<Statsample::Dataset:69891073180060 @fields=[a,b,c] labels={"a"=>nil, "b"=>nil, "c"=>nil} cases=10, @dy=Vector(type:scale, n:10)[3,4,5,6,7,8,9,10,20,30]>
|
15
|
+
|
16
|
+
|
17
|
+
module Multiple
|
18
|
+
# Creates an object for listwise regression. According to resources
|
19
|
+
# select the best engine
|
20
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
21
|
+
def self.listwise(ds,y_var)
|
22
|
+
if HAS_ALGIB
|
23
|
+
AlglibEngine.new(ds,y_var)
|
24
|
+
else
|
25
|
+
ds2=ds.dup_only_valid
|
26
|
+
RubyEngine.new(ds2,y_var)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Creates an object for pairwise regression
|
31
|
+
# For now, always retrieves a RubyEngine
|
32
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
33
|
+
def self.pairwise(ds,y_var)
|
34
|
+
RubyEngine.new(ds,y_var)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Base class for Multiple Regression Engines
|
38
|
+
class BaseEngine
|
39
|
+
def initialize(ds,y_var)
|
40
|
+
@ds=ds
|
41
|
+
@y_var=y_var
|
42
|
+
@r2=nil
|
43
|
+
end
|
44
|
+
|
45
|
+
# Retrieves a vector with predicted values for y
|
46
|
+
def predicted
|
47
|
+
(0...@ds.cases).collect { |i|
|
48
|
+
invalid=false
|
49
|
+
vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
|
50
|
+
if invalid
|
51
|
+
nil
|
52
|
+
else
|
53
|
+
process(vect)
|
54
|
+
end
|
55
|
+
}.to_vector(:scale)
|
56
|
+
end
|
57
|
+
# Retrieves a vector with standarized values for y
|
58
|
+
def standarized_predicted
|
59
|
+
predicted.standarized
|
60
|
+
end
|
61
|
+
# Retrieves a vector with residuals values for y
|
62
|
+
def residuals
|
63
|
+
(0...@ds.cases).collect{|i|
|
64
|
+
invalid=false
|
65
|
+
vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
|
66
|
+
if invalid or @ds[@y_var][i].nil?
|
67
|
+
nil
|
68
|
+
else
|
69
|
+
@ds[@y_var][i] - process(vect)
|
70
|
+
end
|
71
|
+
}.to_vector(:scale)
|
72
|
+
end
|
73
|
+
# R Multiple
|
74
|
+
def r
|
75
|
+
raise "You should implement this"
|
76
|
+
end
|
77
|
+
# Sum of squares Total
|
78
|
+
def sst
|
79
|
+
raise "You should implement this"
|
80
|
+
end
|
81
|
+
# Sum of squares (regression)
|
82
|
+
def ssr
|
83
|
+
r2*sst
|
84
|
+
end
|
85
|
+
# Sum of squares (Error)
|
86
|
+
def sse
|
87
|
+
sst - ssr
|
88
|
+
end
|
89
|
+
# T values for coeffs
|
90
|
+
def coeffs_t
|
91
|
+
out={}
|
92
|
+
se=coeffs_se
|
93
|
+
coeffs.each{|k,v|
|
94
|
+
out[k]=v / se[k]
|
95
|
+
}
|
96
|
+
out
|
97
|
+
end
|
98
|
+
# Mean square Regression
|
99
|
+
def msr
|
100
|
+
ssr.quo(df_r)
|
101
|
+
end
|
102
|
+
# Mean Square Error
|
103
|
+
def mse
|
104
|
+
sse.quo(df_e)
|
105
|
+
end
|
106
|
+
# Degrees of freedom for regression
|
107
|
+
def df_r
|
108
|
+
@dep_columns.size
|
109
|
+
end
|
110
|
+
# Degrees of freedom for error
|
111
|
+
def df_e
|
112
|
+
@ds_valid.cases-@dep_columns.size-1
|
113
|
+
end
|
114
|
+
# Fisher for Anova
|
115
|
+
def f
|
116
|
+
(ssr.quo(df_r)).quo(sse.quo(df_e))
|
117
|
+
end
|
118
|
+
# Significance of Fisher
|
119
|
+
def significance
|
120
|
+
if HAS_GSL
|
121
|
+
GSL::Cdf.fdist_Q(f,df_r,df_e)
|
122
|
+
else
|
123
|
+
raise "Need Ruby/GSL"
|
124
|
+
end
|
125
|
+
end
|
126
|
+
# Tolerance for a given variable
|
127
|
+
# http://talkstats.com/showthread.php?t=5056
|
128
|
+
def tolerance(var)
|
129
|
+
ds=assign_names(@dep_columns)
|
130
|
+
ds.each{|k,v|
|
131
|
+
ds[k]=v.to_vector(:scale)
|
132
|
+
}
|
133
|
+
if HAS_ALGIB
|
134
|
+
lr_class=AlglibEngine
|
135
|
+
ds=ds.to_dataset
|
136
|
+
else
|
137
|
+
lr_class=RubyEngine
|
138
|
+
ds=ds.to_dataset.dup_only_valid
|
139
|
+
end
|
140
|
+
lr=lr_class.new(ds,var)
|
141
|
+
1-lr.r2
|
142
|
+
end
|
143
|
+
# Tolerances for each coefficient
|
144
|
+
def coeffs_tolerances
|
145
|
+
@fields.inject({}) {|a,f|
|
146
|
+
a[f]=tolerance(f);
|
147
|
+
a
|
148
|
+
}
|
149
|
+
end
|
150
|
+
# Standard Error for coefficients
|
151
|
+
def coeffs_se
|
152
|
+
out={}
|
153
|
+
mse=sse.quo(df_e)
|
154
|
+
coeffs.each {|k,v|
|
155
|
+
out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
|
156
|
+
}
|
157
|
+
out
|
158
|
+
end
|
159
|
+
# Estimated Variance-Covariance Matrix
|
160
|
+
# Used for calculation of se of constant
|
161
|
+
def estimated_variance_covariance_matrix
|
162
|
+
mse_p=mse
|
163
|
+
columns=[]
|
164
|
+
@ds_valid.each_vector{|k,v|
|
165
|
+
columns.push(v.data) unless k==@y_var
|
166
|
+
}
|
167
|
+
columns.unshift([1.0]*@ds_valid.cases)
|
168
|
+
x=Matrix.columns(columns)
|
169
|
+
matrix=((x.t*x)).inverse * mse
|
170
|
+
matrix.collect {|i|
|
171
|
+
|
172
|
+
Math::sqrt(i) if i>0
|
173
|
+
}
|
174
|
+
end
|
175
|
+
# T for constant
|
176
|
+
def constant_t
|
177
|
+
constant.to_f/constant_se
|
178
|
+
end
|
179
|
+
# Standard error for constant
|
180
|
+
def constant_se
|
181
|
+
estimated_variance_covariance_matrix[0,0]
|
182
|
+
end
|
183
|
+
# Retrieves a summary for Regression
|
184
|
+
def summary(report_type=ConsoleSummary)
|
185
|
+
c=coeffs
|
186
|
+
out=""
|
187
|
+
out.extend report_type
|
188
|
+
out.add <<HEREDOC
|
189
|
+
Summary for regression of #{@fields.join(',')} over #{@y_var}
|
190
|
+
*************************************************************
|
191
|
+
Engine: #{self.class}
|
192
|
+
Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
|
193
|
+
r=#{sprintf("%0.3f",r)}
|
194
|
+
r2=#{sprintf("%0.3f",r2)}
|
195
|
+
Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
|
196
|
+
HEREDOC
|
197
|
+
|
198
|
+
out.add_line
|
199
|
+
out.add "ANOVA TABLE"
|
200
|
+
|
201
|
+
t=Statsample::ReportTable.new(%w{source ss df ms f s})
|
202
|
+
t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f",significance)])
|
203
|
+
|
204
|
+
t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
|
205
|
+
|
206
|
+
t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
|
207
|
+
|
208
|
+
out.parse_table(t)
|
209
|
+
out
|
210
|
+
end
|
211
|
+
def assign_names(c)
|
212
|
+
a={}
|
213
|
+
@fields.each_index {|i|
|
214
|
+
a[@fields[i]]=c[i]
|
215
|
+
}
|
216
|
+
a
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
# Deprecated
|
221
|
+
# Sum of squares of error (manual calculation)
|
222
|
+
# using the predicted value minus the y_i value
|
223
|
+
def sse_manual
|
224
|
+
pr=predicted
|
225
|
+
cases=0
|
226
|
+
sse=(0...@ds.cases).inject(0) {|a,i|
|
227
|
+
if !@dy.data_with_nils[i].nil? and !pr[i].nil?
|
228
|
+
cases+=1
|
229
|
+
a+((pr[i]-@dy[i])**2)
|
230
|
+
else
|
231
|
+
a
|
232
|
+
end
|
233
|
+
}
|
234
|
+
sse*(min_n_valid-1.0).quo(cases-1)
|
235
|
+
end
|
236
|
+
# Sum of squares of regression
|
237
|
+
# using the predicted value minus y mean
|
238
|
+
def ssr_direct
|
239
|
+
mean=@dy.mean
|
240
|
+
cases=0
|
241
|
+
ssr=(0...@ds.cases).inject(0) {|a,i|
|
242
|
+
invalid=false
|
243
|
+
v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
|
244
|
+
if !invalid
|
245
|
+
cases+=1
|
246
|
+
a+((process(v)-mean)**2)
|
247
|
+
else
|
248
|
+
a
|
249
|
+
end
|
250
|
+
}
|
251
|
+
ssr
|
252
|
+
end
|
253
|
+
def sse_direct
|
254
|
+
sst-ssr
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
if HAS_ALGIB
|
2
|
+
module Statsample
|
3
|
+
module Regression
|
4
|
+
module Multiple
|
5
|
+
# Class for Multiple Regression Analysis
|
6
|
+
# Requires Alglib gem and uses a listwise aproach.
|
7
|
+
# If you need pairwise, use RubyEngine
|
8
|
+
# Example:
|
9
|
+
#
|
10
|
+
# @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
11
|
+
# @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
12
|
+
# @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
13
|
+
# @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
14
|
+
# ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
15
|
+
# lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,'y')
|
16
|
+
#
|
17
|
+
class AlglibEngine < BaseEngine
|
18
|
+
def initialize(ds,y_var)
|
19
|
+
@ds=ds.dup_only_valid
|
20
|
+
@ds_valid=@ds
|
21
|
+
@y_var=y_var
|
22
|
+
@dy=@ds[@y_var]
|
23
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
24
|
+
# Create a custom matrix
|
25
|
+
columns=[]
|
26
|
+
@fields=[]
|
27
|
+
@ds.fields.each{|f|
|
28
|
+
if f!=@y_var
|
29
|
+
columns.push(@ds[f].to_a)
|
30
|
+
@fields.push(f)
|
31
|
+
end
|
32
|
+
}
|
33
|
+
@dep_columns=columns.dup
|
34
|
+
columns.push(@ds[@y_var])
|
35
|
+
matrix=Matrix.columns(columns)
|
36
|
+
@lr_s=nil
|
37
|
+
@lr=::Alglib::LinearRegression.build_from_matrix(matrix)
|
38
|
+
end
|
39
|
+
|
40
|
+
def _dump(i)
|
41
|
+
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
42
|
+
end
|
43
|
+
def self._load(data)
|
44
|
+
h=Marshal.load(data)
|
45
|
+
self.new(h['ds'], h['y_var'])
|
46
|
+
end
|
47
|
+
|
48
|
+
def coeffs
|
49
|
+
assign_names(@lr.coeffs)
|
50
|
+
end
|
51
|
+
# Coefficients using a constant
|
52
|
+
# Based on http://www.xycoon.com/ols1.htm
|
53
|
+
def matrix_resolution
|
54
|
+
mse_p=mse
|
55
|
+
columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
|
56
|
+
columns.unshift([1.0]*@ds.cases)
|
57
|
+
y=Matrix.columns([@dy.data.map {|i| i.to_f}])
|
58
|
+
x=Matrix.columns(columns)
|
59
|
+
xt=x.t
|
60
|
+
matrix=((xt*x)).inverse*xt
|
61
|
+
matrix*y
|
62
|
+
end
|
63
|
+
def r2
|
64
|
+
r**2
|
65
|
+
end
|
66
|
+
def r
|
67
|
+
Bivariate::pearson(@dy,predicted)
|
68
|
+
end
|
69
|
+
def sst
|
70
|
+
@dy.ss
|
71
|
+
end
|
72
|
+
def constant
|
73
|
+
@lr.constant
|
74
|
+
end
|
75
|
+
def standarized_coeffs
|
76
|
+
l=lr_s
|
77
|
+
assign_names(l.coeffs)
|
78
|
+
end
|
79
|
+
def lr_s
|
80
|
+
if @lr_s.nil?
|
81
|
+
build_standarized
|
82
|
+
end
|
83
|
+
@lr_s
|
84
|
+
end
|
85
|
+
def build_standarized
|
86
|
+
@ds_s=@ds.standarize
|
87
|
+
columns=[]
|
88
|
+
@ds_s.fields.each{|f|
|
89
|
+
columns.push(@ds_s[f].to_a) unless f==@y_var
|
90
|
+
}
|
91
|
+
@dep_columns_s=columns.dup
|
92
|
+
columns.push(@ds_s[@y_var])
|
93
|
+
matrix=Matrix.columns(columns)
|
94
|
+
@lr_s=Alglib::LinearRegression.build_from_matrix(matrix)
|
95
|
+
end
|
96
|
+
def process(v)
|
97
|
+
@lr.process(v)
|
98
|
+
end
|
99
|
+
def process_s(v)
|
100
|
+
lr_s.process(v)
|
101
|
+
end
|
102
|
+
# ???? Not equal to SPSS output
|
103
|
+
def standarized_residuals
|
104
|
+
res=residuals
|
105
|
+
red_sd=residuals.sds
|
106
|
+
res.collect {|v|
|
107
|
+
v.quo(red_sd)
|
108
|
+
}.to_vector(:scale)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end # for Statsample
|
114
|
+
end # for if
|
115
|
+
|
116
|
+
|
117
|
+
|
@@ -0,0 +1,140 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
module Multiple
|
4
|
+
# Pure Ruby Class for Multiple Regression Analysis.
|
5
|
+
# Slower than AlglibEngine, but is pure ruby and uses a pairwise aproach for missing values.
|
6
|
+
# If you need listwise aproach for missing values, use AlglibEngine, because is faster.
|
7
|
+
#
|
8
|
+
# Example:
|
9
|
+
#
|
10
|
+
# @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
11
|
+
# @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
12
|
+
# @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
13
|
+
# @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
14
|
+
# ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
15
|
+
# lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y')
|
16
|
+
|
17
|
+
class RubyEngine < BaseEngine
|
18
|
+
def initialize(ds,y_var)
|
19
|
+
super
|
20
|
+
@dy=ds[@y_var]
|
21
|
+
@ds_valid=ds.dup_only_valid
|
22
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
23
|
+
@fields=@ds_indep.fields
|
24
|
+
set_dep_columns
|
25
|
+
obtain_y_vector
|
26
|
+
@matrix_x = Bivariate.correlation_matrix(@ds_indep)
|
27
|
+
@coeffs_stan=(@matrix_x.inverse * @matrix_y).column(0).to_a
|
28
|
+
@min_n_valid=nil
|
29
|
+
end
|
30
|
+
def min_n_valid
|
31
|
+
if @min_n_valid.nil?
|
32
|
+
min=@ds.cases
|
33
|
+
m=Bivariate::n_valid_matrix(@ds)
|
34
|
+
for x in 0...m.row_size
|
35
|
+
for y in 0...m.column_size
|
36
|
+
min=m[x,y] if m[x,y] < min
|
37
|
+
end
|
38
|
+
end
|
39
|
+
@min_n_valid=min
|
40
|
+
end
|
41
|
+
@min_n_valid
|
42
|
+
end
|
43
|
+
def set_dep_columns
|
44
|
+
@dep_columns=[]
|
45
|
+
@ds_indep.each_vector{|k,v|
|
46
|
+
@dep_columns.push(v.data_with_nils)
|
47
|
+
}
|
48
|
+
end
|
49
|
+
# Sum of square total
|
50
|
+
def sst
|
51
|
+
#if @sst.nil?
|
52
|
+
@sst=@dy.variance*(min_n_valid-1.0)
|
53
|
+
#end
|
54
|
+
@sst
|
55
|
+
end
|
56
|
+
def r2
|
57
|
+
if @r2.nil?
|
58
|
+
c=@matrix_y
|
59
|
+
rxx=obtain_predictor_matrix
|
60
|
+
matrix=(c.t*rxx.inverse*c)
|
61
|
+
@r2=matrix[0,0]
|
62
|
+
end
|
63
|
+
@r2
|
64
|
+
end
|
65
|
+
def r
|
66
|
+
Math::sqrt(r2)
|
67
|
+
end
|
68
|
+
|
69
|
+
def df_e
|
70
|
+
min_n_valid-@dep_columns.size-1
|
71
|
+
end
|
72
|
+
def fix_with_mean
|
73
|
+
i=0
|
74
|
+
@ds_indep.each{|row|
|
75
|
+
empty=[]
|
76
|
+
row.each{|k,v|
|
77
|
+
empty.push(k) if v.nil?
|
78
|
+
}
|
79
|
+
if empty.size==1
|
80
|
+
@ds_indep[empty[0]][i]=@ds[empty[0]].mean
|
81
|
+
end
|
82
|
+
i+=1
|
83
|
+
}
|
84
|
+
@ds_indep.update_valid_data
|
85
|
+
set_dep_columns
|
86
|
+
end
|
87
|
+
def fix_with_regression
|
88
|
+
i=0
|
89
|
+
@ds_indep.each{|row|
|
90
|
+
empty=[]
|
91
|
+
row.each{|k,v|
|
92
|
+
empty.push(k) if v.nil?
|
93
|
+
}
|
94
|
+
if empty.size==1
|
95
|
+
field=empty[0]
|
96
|
+
lr=MultipleRegression.new(@ds_indep,field)
|
97
|
+
fields=[]
|
98
|
+
@ds_indep.fields.each{|f|
|
99
|
+
fields.push(row[f]) unless f==field
|
100
|
+
}
|
101
|
+
@ds_indep[field][i]=lr.process(fields)
|
102
|
+
end
|
103
|
+
i+=1
|
104
|
+
}
|
105
|
+
@ds_indep.update_valid_data
|
106
|
+
set_dep_columns
|
107
|
+
end
|
108
|
+
def obtain_y_vector
|
109
|
+
@matrix_y=Matrix.columns([@ds_indep.fields.collect{|f|
|
110
|
+
Bivariate.pearson(@dy, @ds_indep[f])
|
111
|
+
}])
|
112
|
+
end
|
113
|
+
def obtain_predictor_matrix
|
114
|
+
Bivariate::correlation_matrix(@ds_indep)
|
115
|
+
end
|
116
|
+
def constant
|
117
|
+
c=coeffs
|
118
|
+
@dy.mean-@fields.inject(0){|a,k| a+(c[k] * @ds_indep[k].mean)}
|
119
|
+
end
|
120
|
+
def process(v)
|
121
|
+
c=coeffs
|
122
|
+
total=constant
|
123
|
+
@fields.each_index{|i|
|
124
|
+
total+=c[@fields[i]]*v[i]
|
125
|
+
}
|
126
|
+
total
|
127
|
+
end
|
128
|
+
def coeffs
|
129
|
+
sc=standarized_coeffs
|
130
|
+
assign_names(@fields.collect{|f|
|
131
|
+
(sc[f]*@dy.sds).quo(@ds_indep[f].sds)
|
132
|
+
})
|
133
|
+
end
|
134
|
+
def standarized_coeffs
|
135
|
+
assign_names(@coeffs_stan)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|