statsample 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +4 -0
- data/README.txt +5 -5
- data/demo/dominance_analysis_bootstrap.rb +9 -3
- data/demo/dominanceanalysis.rb +23 -7
- data/demo/multivariate_correlation.rb +26 -0
- data/lib/statsample.rb +1 -1
- data/lib/statsample/bivariate.rb +24 -4
- data/lib/statsample/bivariate/polychoric.rb +15 -14
- data/lib/statsample/converters.rb +27 -23
- data/lib/statsample/crosstab.rb +1 -44
- data/lib/statsample/dominanceanalysis.rb +158 -64
- data/lib/statsample/dominanceanalysis/bootstrap.rb +16 -7
- data/lib/statsample/matrix.rb +145 -13
- data/lib/statsample/multiset.rb +248 -265
- data/lib/statsample/regression.rb +3 -0
- data/lib/statsample/regression/multiple.rb +65 -23
- data/lib/statsample/regression/multiple/baseengine.rb +19 -20
- data/lib/statsample/regression/multiple/matrixengine.rb +187 -0
- data/lib/statsample/regression/multiple/rubyengine.rb +58 -98
- data/test/test_bivariate.rb +1 -0
- data/test/test_crosstab.rb +0 -3
- data/test/test_dataset.rb +379 -379
- data/test/test_dominance_analysis.rb +43 -0
- data/test/test_matrix.rb +52 -0
- data/test/test_regression.rb +174 -129
- data/test/test_svg_graph.rb +51 -51
- metadata +29 -3
@@ -1,8 +1,11 @@
|
|
1
1
|
require 'statsample/regression/simple'
|
2
2
|
require 'statsample/regression/multiple'
|
3
|
+
|
4
|
+
require 'statsample/regression/multiple/matrixengine'
|
3
5
|
require 'statsample/regression/multiple/alglibengine'
|
4
6
|
require 'statsample/regression/multiple/rubyengine'
|
5
7
|
require 'statsample/regression/multiple/gslengine'
|
8
|
+
|
6
9
|
require 'statsample/regression/binomial'
|
7
10
|
require 'statsample/regression/binomial/logit'
|
8
11
|
require 'statsample/regression/binomial/probit'
|
@@ -42,35 +42,77 @@ module Statsample
|
|
42
42
|
# -----------------------------------------------
|
43
43
|
#
|
44
44
|
module Multiple
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
45
|
+
# Creates an object for listwise regression.
|
46
|
+
# Alglib is faster, so is prefered over GSL
|
47
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
48
|
+
def self.listwise(ds,y_var)
|
49
|
+
if HAS_ALGIB
|
50
|
+
AlglibEngine.new(ds,y_var)
|
51
|
+
elsif HAS_GSL
|
52
|
+
GslEngine.new(ds,y_var)
|
53
|
+
else
|
54
|
+
ds2=ds.dup_only_valid
|
55
|
+
RubyEngine.new(ds2,y_var)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Creates an object for pairwise regression
|
60
|
+
# For now, always retrieves a RubyEngine
|
61
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
62
|
+
def self.pairwise(ds,y_var)
|
63
|
+
RubyEngine.new(ds,y_var)
|
64
|
+
end
|
65
|
+
def self.listwise_by_exp(ds,exp)
|
66
|
+
raise "Not implemented yet"
|
67
|
+
end
|
68
|
+
# Obtain r2 for regressors
|
69
|
+
def self.r2_from_matrices(rxx,rxy)
|
70
|
+
matrix=(rxy.transpose*rxx.inverse*rxy)
|
71
|
+
matrix[0,0]
|
72
|
+
end
|
73
|
+
|
74
|
+
class MultipleDependent
|
75
|
+
def significance
|
76
|
+
0.0
|
77
|
+
end
|
78
|
+
def initialize(matrix,y_var, opts=Hash.new)
|
79
|
+
matrix.extend Statsample::CovariateMatrix
|
80
|
+
@matrix=matrix
|
81
|
+
@fields=matrix.fields-y_var
|
82
|
+
@y_var=y_var
|
83
|
+
@q=@y_var.size
|
84
|
+
@matrix_cor=matrix.correlation
|
85
|
+
@matrix_cor_xx = @matrix_cor.submatrix(@fields)
|
86
|
+
@matrix_cor_yy = @matrix_cor.submatrix(y_var, y_var)
|
87
|
+
|
88
|
+
@sxx = @matrix.submatrix(@fields)
|
89
|
+
@syy = @matrix.submatrix(y_var, y_var)
|
90
|
+
@sxy = @matrix.submatrix(@fields, y_var)
|
91
|
+
@syx = @sxy.t
|
92
|
+
|
93
|
+
|
57
94
|
end
|
58
95
|
|
59
|
-
|
60
|
-
|
61
|
-
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
62
|
-
def self.pairwise(ds,y_var)
|
63
|
-
RubyEngine.new(ds,y_var)
|
96
|
+
def r2yx
|
97
|
+
1- (@matrix_cor.determinant.quo(@matrix_cor_yy.determinant * @matrix_cor_xx.determinant))
|
64
98
|
end
|
65
|
-
|
66
|
-
|
99
|
+
# Residual covariance of Y after accountin with lineal relation with x
|
100
|
+
def syyx
|
101
|
+
@syy-@syx*@sxx.inverse*@sxy
|
67
102
|
end
|
68
|
-
|
69
|
-
|
70
|
-
matrix=(rxy.transpose*rxx.inverse*rxy)
|
71
|
-
matrix[0,0]
|
103
|
+
def r2yx_covariance
|
104
|
+
1-(syyx.determinant.quo(@syy.determinant))
|
72
105
|
end
|
73
106
|
|
107
|
+
def vxy
|
108
|
+
@q-(@syy.inverse*syyx).trace
|
109
|
+
end
|
110
|
+
def p2yx
|
111
|
+
vxy.quo(@q)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
|
74
116
|
end
|
75
117
|
end
|
76
118
|
end
|
@@ -3,12 +3,21 @@ module Statsample
|
|
3
3
|
module Multiple
|
4
4
|
# Base class for Multiple Regression Engines
|
5
5
|
class BaseEngine
|
6
|
+
|
6
7
|
include GetText
|
7
8
|
bindtextdomain("statsample")
|
8
9
|
# Name of analysis
|
9
10
|
attr_accessor :name
|
11
|
+
|
12
|
+
def self.univariate?
|
13
|
+
true
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
|
10
18
|
def initialize(ds, y_var, opts = Hash.new)
|
11
19
|
@ds=ds
|
20
|
+
@cases=@ds.cases
|
12
21
|
@y_var=y_var
|
13
22
|
@r2=nil
|
14
23
|
@name=_("Multiple Regression: %s over %s") % [ ds.fields.join(",") , @y_var]
|
@@ -92,7 +101,7 @@ module Statsample
|
|
92
101
|
end
|
93
102
|
# Significance of Fisher
|
94
103
|
def significance
|
95
|
-
1.0-Distribution::F.cdf(f,df_r,df_e)
|
104
|
+
(1.0-Distribution::F.cdf(f, df_r, df_e)).abs
|
96
105
|
end
|
97
106
|
# Tolerance for a given variable
|
98
107
|
# http://talkstats.com/showthread.php?t=5056
|
@@ -120,6 +129,11 @@ module Statsample
|
|
120
129
|
}
|
121
130
|
out
|
122
131
|
end
|
132
|
+
# Estandar error of R
|
133
|
+
def se_r2
|
134
|
+
Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
|
135
|
+
end
|
136
|
+
|
123
137
|
# Estimated Variance-Covariance Matrix
|
124
138
|
# Used for calculation of se of constant
|
125
139
|
def estimated_variance_covariance_matrix
|
@@ -152,8 +166,8 @@ module Statsample
|
|
152
166
|
c=coeffs
|
153
167
|
generator.add_text(_("Engine: %s") % self.class)
|
154
168
|
generator.add_text(_("Cases(listwise)=%d(%d)") % [@ds.cases, @ds_valid.cases])
|
155
|
-
generator.add_text("
|
156
|
-
generator.add_text("
|
169
|
+
generator.add_text("R=#{sprintf('%0.3f',r)}")
|
170
|
+
generator.add_text("R^2=#{sprintf('%0.3f',r2)}")
|
157
171
|
|
158
172
|
generator.add_text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') )
|
159
173
|
|
@@ -167,7 +181,8 @@ module Statsample
|
|
167
181
|
cse=coeffs_se
|
168
182
|
t=ReportBuilder::Table.new(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} )
|
169
183
|
|
170
|
-
|
184
|
+
t.add_row([_("Constant"), sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
|
185
|
+
|
171
186
|
@fields.each do |f|
|
172
187
|
t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
|
173
188
|
end
|
@@ -184,22 +199,6 @@ module Statsample
|
|
184
199
|
a
|
185
200
|
end
|
186
201
|
|
187
|
-
# Deprecated
|
188
|
-
# Sum of squares of error (manual calculation)
|
189
|
-
# using the predicted value minus the y_i value
|
190
|
-
def sse_manual
|
191
|
-
pr=predicted
|
192
|
-
cases=0
|
193
|
-
sse=(0...@ds.cases).inject(0) {|a,i|
|
194
|
-
if !@dy.data_with_nils[i].nil? and !pr[i].nil?
|
195
|
-
cases+=1
|
196
|
-
a+((pr[i]-@dy[i])**2)
|
197
|
-
else
|
198
|
-
a
|
199
|
-
end
|
200
|
-
}
|
201
|
-
sse*(min_n_valid-1.0).quo(cases-1)
|
202
|
-
end
|
203
202
|
# Sum of squares of regression
|
204
203
|
# using the predicted value minus y mean
|
205
204
|
def ssr_direct
|
@@ -0,0 +1,187 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
module Multiple
|
4
|
+
# Pure Ruby Class for Multiple Regression Analysis, based on a covariance or correlation matrix.
|
5
|
+
# <b>Remember:</b> NEVER use a Covariance data if you have missing data. Use only correlation matrix on that case.
|
6
|
+
#
|
7
|
+
#
|
8
|
+
# Example:
|
9
|
+
#
|
10
|
+
# matrix=[[1.0, 0.5, 0.2], [0.5, 1.0, 0.7], [0.2, 0.7, 1.0]]
|
11
|
+
#
|
12
|
+
# lr=Statsample::Regression::Multiple::MatrixEngine.new(matrix,2)
|
13
|
+
|
14
|
+
class MatrixEngine < BaseEngine
|
15
|
+
# Hash of standard deviation of predictors.
|
16
|
+
# Only useful for Correlation Matrix, because by default is set to 1
|
17
|
+
attr_accessor :x_sd
|
18
|
+
# Standard deviation of criteria.
|
19
|
+
# Only useful for Correlation Matrix, because by default is set to 1
|
20
|
+
|
21
|
+
attr_accessor :y_sd
|
22
|
+
# Hash of mean for predictors. By default, set to 0
|
23
|
+
#
|
24
|
+
attr_accessor :x_mean
|
25
|
+
|
26
|
+
# Mean for criteria. By default, set to 0
|
27
|
+
#
|
28
|
+
attr_accessor :y_mean
|
29
|
+
|
30
|
+
# Number of cases
|
31
|
+
attr_writer :cases
|
32
|
+
|
33
|
+
# Create object
|
34
|
+
#
|
35
|
+
def initialize(matrix,y_var, opts=Hash.new)
|
36
|
+
matrix.extend Statsample::CovariateMatrix
|
37
|
+
raise "#{y_var} variable should be on data" unless matrix.fields.include? y_var
|
38
|
+
|
39
|
+
@matrix_cor=matrix.correlation
|
40
|
+
|
41
|
+
@y_var=y_var
|
42
|
+
@fields=matrix.fields-[y_var]
|
43
|
+
@n_predictors=@fields.size
|
44
|
+
@matrix=matrix
|
45
|
+
@matrix_x= matrix.submatrix(@fields)
|
46
|
+
@matrix_y = matrix.submatrix(@fields, [y_var])
|
47
|
+
@matrix_y_cor=@matrix_cor.submatrix(@fields, [y_var])
|
48
|
+
@result_matrix=@matrix_x.inverse * @matrix_y
|
49
|
+
@y_sd=Math::sqrt(@matrix.submatrix([y_var])[0,0])
|
50
|
+
@x_sd=@matrix_x.row_size.times.inject({}) {|ac,i|
|
51
|
+
ac[@matrix_x.fields[i]]=Math::sqrt(@matrix_x[i,i])
|
52
|
+
ac;
|
53
|
+
}
|
54
|
+
@cases=nil
|
55
|
+
@x_mean=@fields.inject({}) {|ac,f|
|
56
|
+
ac[f]=0.0
|
57
|
+
ac;
|
58
|
+
}
|
59
|
+
|
60
|
+
@y_mean=0.0
|
61
|
+
@name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var]
|
62
|
+
|
63
|
+
|
64
|
+
opts.each{|k,v|
|
65
|
+
self.send("#{k}=",v) if self.respond_to? k
|
66
|
+
}
|
67
|
+
if matrix.type==:covariance
|
68
|
+
@coeffs=@result_matrix.column(0).to_a
|
69
|
+
@coeffs_stan=coeffs.collect {|k,v|
|
70
|
+
coeffs[k]*@x_sd[k].quo(@y_sd)
|
71
|
+
}
|
72
|
+
else
|
73
|
+
@coeffs_stan=@result_matrix.column(0).to_a
|
74
|
+
|
75
|
+
@coeffs=standarized_coeffs.collect {|k,v|
|
76
|
+
standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
|
77
|
+
}
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
def cases
|
82
|
+
raise "You should define the number of valid cases first" if @cases.nil?
|
83
|
+
@cases
|
84
|
+
end
|
85
|
+
# Get R^2 for the regression
|
86
|
+
# Equal to
|
87
|
+
# * 1-(|R| / |R_x|) or
|
88
|
+
# * Sum(b_i*r_yi)
|
89
|
+
def r2
|
90
|
+
@n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y_cor[i,0]}
|
91
|
+
#1-(@matrix.correlation.determinant.quo(@matrix_x.correlation.determinant))
|
92
|
+
end
|
93
|
+
def r
|
94
|
+
Math::sqrt(r2)
|
95
|
+
end
|
96
|
+
|
97
|
+
def constant
|
98
|
+
c=coeffs
|
99
|
+
@y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
|
100
|
+
end
|
101
|
+
def coeffs
|
102
|
+
assign_names(@coeffs)
|
103
|
+
end
|
104
|
+
def standarized_coeffs
|
105
|
+
assign_names(@coeffs_stan)
|
106
|
+
end
|
107
|
+
|
108
|
+
def sst
|
109
|
+
@y_sd**2*(cases-1.0)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Degrees of freedom for regression
|
113
|
+
def df_r
|
114
|
+
@n_predictors
|
115
|
+
end
|
116
|
+
# Degrees of freedom for error
|
117
|
+
def df_e
|
118
|
+
cases-@n_predictors-1
|
119
|
+
end
|
120
|
+
|
121
|
+
# Tolerance for a given variable
|
122
|
+
# defined as (1-r2) of regression of other independent variables
|
123
|
+
# over the selected
|
124
|
+
# http://talkstats.com/showthread.php?t=5056
|
125
|
+
def tolerance(var)
|
126
|
+
lr=Statsample::Regression::Multiple::MatrixEngine.new(@matrix_x, var)
|
127
|
+
1-lr.r2
|
128
|
+
end
|
129
|
+
# Standard Error for coefficients.
|
130
|
+
# Standard error of a coefficients depends on
|
131
|
+
# * Tolerance of the coeffients: Higher tolerances implies higher error
|
132
|
+
# * Higher r2 implies lower error
|
133
|
+
|
134
|
+
# Reference: Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
|
135
|
+
#
|
136
|
+
def coeffs_se
|
137
|
+
out={}
|
138
|
+
mse=sse.quo(df_e)
|
139
|
+
coeffs.each {|k,v|
|
140
|
+
out[k]=@y_sd.quo(@x_sd[k])*Math::sqrt( 1.quo(tolerance(k)))*Math::sqrt((1-r2).quo(df_e))
|
141
|
+
}
|
142
|
+
out
|
143
|
+
end
|
144
|
+
# Standard error for constant
|
145
|
+
def constant_se
|
146
|
+
nil
|
147
|
+
end
|
148
|
+
|
149
|
+
def to_reportbuilder(generator)
|
150
|
+
anchor=generator.add_toc_entry(_("Multiple Regression: ")+@name)
|
151
|
+
generator.add_html "<div class='multiple-regression'>#{@name}<a name='#{anchor}'></a>"
|
152
|
+
c=coeffs
|
153
|
+
generator.add_text(_("Engine: %s") % self.class)
|
154
|
+
generator.add_text(_("Cases=%d") % [@cases])
|
155
|
+
generator.add_text("R=#{sprintf('%0.3f',r)}")
|
156
|
+
generator.add_text("R^2=#{sprintf('%0.3f',r2)}")
|
157
|
+
|
158
|
+
generator.add_text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') )
|
159
|
+
|
160
|
+
t=ReportBuilder::Table.new(:name=>"ANOVA", :header=>%w{source ss df ms f s})
|
161
|
+
t.add_row([_("Regression"), sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
|
162
|
+
t.add_row([_("Error"), sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
|
163
|
+
|
164
|
+
t.add_row([_("Total"), sprintf("%0.3f",sst), df_r+df_e])
|
165
|
+
generator.parse_element(t)
|
166
|
+
sc=standarized_coeffs
|
167
|
+
cse=coeffs_se
|
168
|
+
t=ReportBuilder::Table.new(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} )
|
169
|
+
|
170
|
+
if (constant_se.nil?)
|
171
|
+
t.add_row([_("Constant"), sprintf("%0.3f", constant),"--","?","?"])
|
172
|
+
else
|
173
|
+
t.add_row([_("Constant"), sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
|
174
|
+
end
|
175
|
+
|
176
|
+
@fields.each do |f|
|
177
|
+
t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
|
178
|
+
end
|
179
|
+
generator.parse_element(t)
|
180
|
+
generator.add_html("</div>")
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
@@ -15,119 +15,79 @@ module Multiple
|
|
15
15
|
# ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
16
16
|
# lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y')
|
17
17
|
|
18
|
-
class RubyEngine <
|
18
|
+
class RubyEngine < MatrixEngine
|
19
19
|
def initialize(ds,y_var, opts=Hash.new)
|
20
|
-
|
20
|
+
matrix=Statsample::Bivariate.correlation_matrix(ds)
|
21
|
+
fields_indep=ds.fields-[y_var]
|
22
|
+
default={
|
23
|
+
:y_mean=>ds[y_var].mean,
|
24
|
+
:x_mean=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
|
25
|
+
:y_sd=>ds[y_var].sd,
|
26
|
+
:x_sd=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
|
27
|
+
:cases=>Statsample::Bivariate.min_n_valid(ds)
|
28
|
+
}
|
29
|
+
opts=opts.merge(default)
|
30
|
+
super(matrix, y_var, opts)
|
31
|
+
@ds=ds
|
21
32
|
@dy=ds[@y_var]
|
22
33
|
@ds_valid=ds.dup_only_valid
|
23
|
-
@ds_indep=ds.dup(ds.fields-[y_var])
|
24
|
-
|
34
|
+
@ds_indep = ds.dup(ds.fields-[y_var])
|
35
|
+
|
36
|
+
# p obtain_predictor_matrix
|
37
|
+
# p @matrix_x.correlation
|
38
|
+
|
25
39
|
set_dep_columns
|
26
|
-
obtain_y_vector
|
27
|
-
@matrix_x = Bivariate.correlation_matrix(@ds_indep)
|
28
|
-
@coeffs_stan=(@matrix_x.inverse * @matrix_y).column(0).to_a
|
29
|
-
@min_n_valid=nil
|
30
|
-
end
|
31
|
-
def min_n_valid
|
32
|
-
if @min_n_valid.nil?
|
33
|
-
min=@ds.cases
|
34
|
-
m=Bivariate::n_valid_matrix(@ds)
|
35
|
-
for x in 0...m.row_size
|
36
|
-
for y in 0...m.column_size
|
37
|
-
min=m[x,y] if m[x,y] < min
|
38
|
-
end
|
39
|
-
end
|
40
|
-
@min_n_valid=min
|
41
|
-
end
|
42
|
-
@min_n_valid
|
43
40
|
end
|
41
|
+
|
44
42
|
def set_dep_columns
|
45
43
|
@dep_columns=[]
|
46
44
|
@ds_indep.each_vector{|k,v|
|
47
45
|
@dep_columns.push(v.data_with_nils)
|
48
46
|
}
|
49
47
|
end
|
50
|
-
# Sum of square total
|
51
|
-
def sst
|
52
|
-
#if @sst.nil?
|
53
|
-
@sst=@dy.variance*(min_n_valid-1.0)
|
54
|
-
#end
|
55
|
-
@sst
|
56
|
-
end
|
57
|
-
def r2
|
58
|
-
if @r2.nil?
|
59
|
-
c=@matrix_y
|
60
|
-
rxx=obtain_predictor_matrix
|
61
|
-
matrix=(c.t*rxx.inverse*c)
|
62
|
-
@r2=matrix[0,0]
|
63
|
-
end
|
64
|
-
@r2
|
65
|
-
end
|
66
|
-
def r
|
67
|
-
Math::sqrt(r2)
|
68
|
-
end
|
69
48
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
empty=[]
|
77
|
-
row.each do |k,v|
|
78
|
-
empty.push(k) if v.nil?
|
79
|
-
end
|
80
|
-
if empty.size==1
|
81
|
-
@ds_indep[empty[0]][i]=@ds[empty[0]].mean
|
82
|
-
end
|
83
|
-
i+=1
|
49
|
+
def fix_with_mean
|
50
|
+
i=0
|
51
|
+
@ds_indep.each do |row|
|
52
|
+
empty=[]
|
53
|
+
row.each do |k,v|
|
54
|
+
empty.push(k) if v.nil?
|
84
55
|
end
|
85
|
-
|
86
|
-
|
56
|
+
if empty.size==1
|
57
|
+
@ds_indep[empty[0]][i]=@ds[empty[0]].mean
|
58
|
+
end
|
59
|
+
i+=1
|
87
60
|
end
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
i+=1
|
61
|
+
@ds_indep.update_valid_data
|
62
|
+
set_dep_columns
|
63
|
+
end
|
64
|
+
def fix_with_regression
|
65
|
+
i=0
|
66
|
+
@ds_indep.each{|row|
|
67
|
+
empty=[]
|
68
|
+
row.each{|k,v|
|
69
|
+
empty.push(k) if v.nil?
|
70
|
+
}
|
71
|
+
if empty.size==1
|
72
|
+
field=empty[0]
|
73
|
+
lr=MultipleRegression.new(@ds_indep,field)
|
74
|
+
fields=[]
|
75
|
+
@ds_indep.fields.each{|f|
|
76
|
+
fields.push(row[f]) unless f==field
|
105
77
|
}
|
106
|
-
@ds_indep.
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
@dy.mean-@fields.inject(0){|a,k| a+(c[k] * @ds_indep[k].mean)}
|
120
|
-
end
|
121
|
-
|
122
|
-
def coeffs
|
123
|
-
sc=standarized_coeffs
|
124
|
-
assign_names(@fields.collect{|f|
|
125
|
-
(sc[f]*@dy.sds).quo(@ds_indep[f].sds)
|
126
|
-
})
|
127
|
-
end
|
128
|
-
def standarized_coeffs
|
129
|
-
assign_names(@coeffs_stan)
|
130
|
-
end
|
78
|
+
@ds_indep[field][i]=lr.process(fields)
|
79
|
+
end
|
80
|
+
i+=1
|
81
|
+
}
|
82
|
+
@ds_indep.update_valid_data
|
83
|
+
set_dep_columns
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
# Standard error for constant
|
88
|
+
def constant_se
|
89
|
+
estimated_variance_covariance_matrix[0,0]
|
90
|
+
end
|
131
91
|
end
|
132
92
|
end
|
133
93
|
end
|