statsample 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +3 -1
- data/lib/statsample.rb +175 -179
- data/lib/statsample/codification.rb +1 -1
- data/lib/statsample/converter/csv18.rb +56 -0
- data/lib/statsample/converter/csv19.rb +60 -0
- data/lib/statsample/converters.rb +26 -75
- data/lib/statsample/dataset.rb +38 -29
- data/lib/statsample/dominanceanalysis.rb +6 -6
- data/lib/statsample/graph/gdchart.rb +2 -1
- data/lib/statsample/graph/svggraph.rb +10 -9
- data/lib/statsample/multiset.rb +3 -3
- data/lib/statsample/regression/multiple.rb +43 -271
- data/lib/statsample/regression/multiple/baseengine.rb +235 -0
- data/lib/statsample/regression/multiple/gslengine.rb +2 -2
- data/lib/statsample/vector.rb +754 -736
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +22 -3
- data/test/test_distribution.rb +4 -3
- data/test/test_ggobi.rb +2 -2
- data/test/test_regression.rb +11 -2
- data/test/test_svg_graph.rb +0 -1
- data/test/test_vector.rb +50 -5
- data/test/test_xls.rb +2 -4
- metadata +5 -3
- data/test/_test_chart.rb +0 -58
@@ -1,284 +1,56 @@
|
|
1
|
+
require 'statsample/regression/multiple/baseengine'
|
1
2
|
module Statsample
|
2
|
-
module Regression
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
def self.listwise(ds,y_var)
|
22
|
-
if HAS_ALGIB
|
3
|
+
module Regression
|
4
|
+
# Module for Linear Multiple Regression Analysis
|
5
|
+
# You can call Regression::Multiple.listwise or Regression::Multiple.pairwise or instance directly the engines
|
6
|
+
# Example.
|
7
|
+
#
|
8
|
+
# require 'statsample'
|
9
|
+
# @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
10
|
+
# @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
11
|
+
# @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
12
|
+
# @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
13
|
+
# ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
14
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
15
|
+
# #<Statsample::Regression::Multiple::AlglibEngine:0x7f21912e4758 @ds_valid=#<Statsample::Dataset:69891073182680 @fields=[a,b,c,y] labels={"a"=>nil, "b"=>nil, "y"=>nil, "c"=>nil} cases=10, @lr=#<Alglib::LinearRegression:0x7f21912df118 @model=#<Alglib_ext::LinearModel:0x7f21912df708>, @ivars=3, @cases=10, @report=#<Alglib_ext::LrReport:0x7f21912df168>>, @y_var="y", @ds=#<Statsample::Dataset:69891073182680 @fields=[a,b,c,y] labels={"a"=>nil, "b"=>nil, "y"=>nil, "c"=>nil} cases=10, @fields=["a", "b", "c"], @lr_s=nil, @dep_columns=[[1, 3, 2, 4, 3, 5, 4, 6, 5, 7], [3, 3, 4, 4, 5, 5, 6, 6, 4, 4], [11, 22, 30, 40, 50, 65, 78, 79, 99, 100]], @ds_indep=#<Statsample::Dataset:69891073180060 @fields=[a,b,c] labels={"a"=>nil, "b"=>nil, "c"=>nil} cases=10, @dy=Vector(type:scale, n:10)[3,4,5,6,7,8,9,10,20,30]>
|
16
|
+
module Multiple
|
17
|
+
# Creates an object for listwise regression.
|
18
|
+
# Alglib is faster, so is prefered over GSL
|
19
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
20
|
+
def self.listwise(ds,y_var)
|
21
|
+
if HAS_ALGIB
|
23
22
|
AlglibEngine.new(ds,y_var)
|
24
|
-
|
23
|
+
elsif HAS_GSL
|
25
24
|
GslEngine.new(ds,y_var)
|
26
|
-
|
25
|
+
else
|
27
26
|
ds2=ds.dup_only_valid
|
28
27
|
RubyEngine.new(ds2,y_var)
|
28
|
+
end
|
29
29
|
end
|
30
|
-
end
|
31
|
-
|
32
|
-
# Creates an object for pairwise regression
|
33
|
-
# For now, always retrieves a RubyEngine
|
34
|
-
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
35
|
-
def self.pairwise(ds,y_var)
|
36
|
-
RubyEngine.new(ds,y_var)
|
37
|
-
end
|
38
|
-
def self.listwise_by_exp(ds,exp)
|
39
|
-
end
|
40
|
-
# Returns a dataset and name of criteria using a expression.
|
41
|
-
# All nominal vectors are replaced by dummy coding
|
42
|
-
# and interactions are calculated
|
43
|
-
|
44
|
-
def self.ds_by_exp(ds,exp)
|
45
|
-
raise "Not implemented"
|
46
|
-
parts=exp.split(/[\+=]/)
|
47
|
-
dependent=parts.pop
|
48
|
-
ds_out=[]
|
49
|
-
parts.each{|p|
|
50
|
-
|
51
|
-
}
|
52
|
-
end
|
53
|
-
# Base class for Multiple Regression Engines
|
54
|
-
class BaseEngine
|
55
|
-
def initialize(ds,y_var)
|
56
|
-
@ds=ds
|
57
|
-
@y_var=y_var
|
58
|
-
@r2=nil
|
59
|
-
end
|
60
|
-
|
61
|
-
# Retrieves a vector with predicted values for y
|
62
|
-
def predicted
|
63
|
-
(0...@ds.cases).collect { |i|
|
64
|
-
invalid=false
|
65
|
-
vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
|
66
|
-
if invalid
|
67
|
-
nil
|
68
|
-
else
|
69
|
-
process(vect)
|
70
|
-
end
|
71
|
-
}.to_vector(:scale)
|
72
|
-
end
|
73
|
-
# Retrieves a vector with standarized values for y
|
74
|
-
def standarized_predicted
|
75
|
-
predicted.standarized
|
76
|
-
end
|
77
|
-
# Retrieves a vector with residuals values for y
|
78
|
-
def residuals
|
79
|
-
(0...@ds.cases).collect{|i|
|
80
|
-
invalid=false
|
81
|
-
vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
|
82
|
-
if invalid or @ds[@y_var][i].nil?
|
83
|
-
nil
|
84
|
-
else
|
85
|
-
@ds[@y_var][i] - process(vect)
|
86
|
-
end
|
87
|
-
}.to_vector(:scale)
|
88
|
-
end
|
89
|
-
# R Multiple
|
90
|
-
def r
|
91
|
-
raise "You should implement this"
|
92
|
-
end
|
93
|
-
# Sum of squares Total
|
94
|
-
def sst
|
95
|
-
raise "You should implement this"
|
96
|
-
end
|
97
|
-
# Sum of squares (regression)
|
98
|
-
def ssr
|
99
|
-
r2*sst
|
100
|
-
end
|
101
|
-
# Sum of squares (Error)
|
102
|
-
def sse
|
103
|
-
sst - ssr
|
104
|
-
end
|
105
|
-
# T values for coeffs
|
106
|
-
def coeffs_t
|
107
|
-
out={}
|
108
|
-
se=coeffs_se
|
109
|
-
coeffs.each{|k,v|
|
110
|
-
out[k]=v / se[k]
|
111
|
-
}
|
112
|
-
out
|
113
|
-
end
|
114
|
-
# Mean square Regression
|
115
|
-
def msr
|
116
|
-
ssr.quo(df_r)
|
117
|
-
end
|
118
|
-
# Mean Square Error
|
119
|
-
def mse
|
120
|
-
sse.quo(df_e)
|
121
|
-
end
|
122
|
-
# Degrees of freedom for regression
|
123
|
-
def df_r
|
124
|
-
@dep_columns.size
|
125
|
-
end
|
126
|
-
# Degrees of freedom for error
|
127
|
-
def df_e
|
128
|
-
@ds_valid.cases-@dep_columns.size-1
|
129
|
-
end
|
130
|
-
# Fisher for Anova
|
131
|
-
def f
|
132
|
-
(ssr.quo(df_r)).quo(sse.quo(df_e))
|
133
|
-
end
|
134
|
-
# Significance of Fisher
|
135
|
-
def significance
|
136
|
-
1.0-Distribution::F.cdf(f,df_r,df_e)
|
137
|
-
end
|
138
|
-
# Tolerance for a given variable
|
139
|
-
# http://talkstats.com/showthread.php?t=5056
|
140
|
-
def tolerance(var)
|
141
|
-
ds=assign_names(@dep_columns)
|
142
|
-
ds.each{|k,v|
|
143
|
-
ds[k]=v.to_vector(:scale)
|
144
|
-
}
|
145
|
-
lr=Multiple.listwise(ds.to_dataset,var)
|
146
|
-
1-lr.r2
|
147
|
-
end
|
148
|
-
# Tolerances for each coefficient
|
149
|
-
def coeffs_tolerances
|
150
|
-
@fields.inject({}) {|a,f|
|
151
|
-
a[f]=tolerance(f);
|
152
|
-
a
|
153
|
-
}
|
154
|
-
end
|
155
|
-
# Standard Error for coefficients
|
156
|
-
def coeffs_se
|
157
|
-
out={}
|
158
|
-
mse=sse.quo(df_e)
|
159
|
-
coeffs.each {|k,v|
|
160
|
-
out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
|
161
|
-
}
|
162
|
-
out
|
163
|
-
end
|
164
|
-
# Estimated Variance-Covariance Matrix
|
165
|
-
# Used for calculation of se of constant
|
166
|
-
def estimated_variance_covariance_matrix
|
167
|
-
mse_p=mse
|
168
|
-
columns=[]
|
169
|
-
@ds_valid.each_vector{|k,v|
|
170
|
-
columns.push(v.data) unless k==@y_var
|
171
|
-
}
|
172
|
-
columns.unshift([1.0]*@ds_valid.cases)
|
173
|
-
x=Matrix.columns(columns)
|
174
|
-
matrix=((x.t*x)).inverse * mse
|
175
|
-
matrix.collect {|i|
|
176
|
-
Math::sqrt(i) if i>0
|
177
|
-
}
|
178
|
-
end
|
179
|
-
# T for constant
|
180
|
-
def constant_t
|
181
|
-
constant.to_f/constant_se
|
182
|
-
end
|
183
|
-
# Standard error for constant
|
184
|
-
def constant_se
|
185
|
-
estimated_variance_covariance_matrix[0,0]
|
186
|
-
end
|
187
|
-
# Retrieves a summary for Regression
|
188
|
-
def summary(report_type=ConsoleSummary)
|
189
|
-
c=coeffs
|
190
|
-
out=""
|
191
|
-
out.extend report_type
|
192
|
-
out.add <<HEREDOC
|
193
|
-
Summary for regression of #{@fields.join(',')} over #{@y_var}
|
194
|
-
*************************************************************
|
195
|
-
Engine: #{self.class}
|
196
|
-
Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
|
197
|
-
r=#{sprintf("%0.3f",r)}
|
198
|
-
r2=#{sprintf("%0.3f",r2)}
|
199
|
-
Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
|
200
|
-
HEREDOC
|
201
|
-
|
202
|
-
out.add_line
|
203
|
-
out.add "ANOVA TABLE"
|
204
|
-
|
205
|
-
t=Statsample::ReportTable.new(%w{source ss df ms f s})
|
206
|
-
t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
|
207
|
-
t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
|
208
|
-
|
209
|
-
t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
|
210
30
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
cse=coeffs_se
|
217
|
-
t=Statsample::ReportTable.new(%w{coeff b beta se t})
|
218
|
-
t.add_row(["Constant", sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
|
219
|
-
@fields.each{|f|
|
220
|
-
t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
|
221
|
-
}
|
222
|
-
out.parse_table(t)
|
223
|
-
|
224
|
-
rescue
|
31
|
+
# Creates an object for pairwise regression
|
32
|
+
# For now, always retrieves a RubyEngine
|
33
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
34
|
+
def self.pairwise(ds,y_var)
|
35
|
+
RubyEngine.new(ds,y_var)
|
225
36
|
end
|
226
|
-
|
37
|
+
def self.listwise_by_exp(ds,exp)
|
38
|
+
raise "Not implemented yet"
|
227
39
|
end
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
40
|
+
# Returns a dataset and name of criteria using a expression.
|
41
|
+
# All nominal vectors are replaced by dummy coding
|
42
|
+
# and interactions are calculated
|
43
|
+
|
44
|
+
def self.ds_by_exp(ds,exp)
|
45
|
+
raise "Not implemented"
|
46
|
+
parts=exp.split(/[\+=]/)
|
47
|
+
dependent=parts.pop
|
48
|
+
ds_out=[]
|
49
|
+
parts.each{|p|
|
50
|
+
|
51
|
+
}
|
234
52
|
end
|
235
|
-
|
236
53
|
|
237
|
-
# Deprecated
|
238
|
-
# Sum of squares of error (manual calculation)
|
239
|
-
# using the predicted value minus the y_i value
|
240
|
-
def sse_manual
|
241
|
-
pr=predicted
|
242
|
-
cases=0
|
243
|
-
sse=(0...@ds.cases).inject(0) {|a,i|
|
244
|
-
if !@dy.data_with_nils[i].nil? and !pr[i].nil?
|
245
|
-
cases+=1
|
246
|
-
a+((pr[i]-@dy[i])**2)
|
247
|
-
else
|
248
|
-
a
|
249
|
-
end
|
250
|
-
}
|
251
|
-
sse*(min_n_valid-1.0).quo(cases-1)
|
252
|
-
end
|
253
|
-
# Sum of squares of regression
|
254
|
-
# using the predicted value minus y mean
|
255
|
-
def ssr_direct
|
256
|
-
mean=@dy.mean
|
257
|
-
cases=0
|
258
|
-
ssr=(0...@ds.cases).inject(0) {|a,i|
|
259
|
-
invalid=false
|
260
|
-
v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
|
261
|
-
if !invalid
|
262
|
-
cases+=1
|
263
|
-
a+((process(v)-mean)**2)
|
264
|
-
else
|
265
|
-
a
|
266
|
-
end
|
267
|
-
}
|
268
|
-
ssr
|
269
|
-
end
|
270
|
-
def sse_direct
|
271
|
-
sst-ssr
|
272
54
|
end
|
273
|
-
|
274
|
-
c=coeffs
|
275
|
-
total=constant
|
276
|
-
@fields.each_index{|i|
|
277
|
-
total+=c[@fields[i]]*v[i]
|
278
|
-
}
|
279
|
-
total
|
280
|
-
end
|
281
|
-
end
|
282
|
-
end
|
283
|
-
end
|
55
|
+
end
|
284
56
|
end
|
@@ -0,0 +1,235 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
module Multiple
|
4
|
+
# Base class for Multiple Regression Engines
|
5
|
+
class BaseEngine
|
6
|
+
def initialize(ds,y_var)
|
7
|
+
@ds=ds
|
8
|
+
@y_var=y_var
|
9
|
+
@r2=nil
|
10
|
+
end
|
11
|
+
|
12
|
+
# Retrieves a vector with predicted values for y
|
13
|
+
def predicted
|
14
|
+
(0...@ds.cases).collect { |i|
|
15
|
+
invalid=false
|
16
|
+
vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
|
17
|
+
if invalid
|
18
|
+
nil
|
19
|
+
else
|
20
|
+
process(vect)
|
21
|
+
end
|
22
|
+
}.to_vector(:scale)
|
23
|
+
end
|
24
|
+
# Retrieves a vector with standarized values for y
|
25
|
+
def standarized_predicted
|
26
|
+
predicted.standarized
|
27
|
+
end
|
28
|
+
# Retrieves a vector with residuals values for y
|
29
|
+
def residuals
|
30
|
+
(0...@ds.cases).collect{|i|
|
31
|
+
invalid=false
|
32
|
+
vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
|
33
|
+
if invalid or @ds[@y_var][i].nil?
|
34
|
+
nil
|
35
|
+
else
|
36
|
+
@ds[@y_var][i] - process(vect)
|
37
|
+
end
|
38
|
+
}.to_vector(:scale)
|
39
|
+
end
|
40
|
+
# R Multiple
|
41
|
+
def r
|
42
|
+
raise "You should implement this"
|
43
|
+
end
|
44
|
+
# Sum of squares Total
|
45
|
+
def sst
|
46
|
+
raise "You should implement this"
|
47
|
+
end
|
48
|
+
# Sum of squares (regression)
|
49
|
+
def ssr
|
50
|
+
r2*sst
|
51
|
+
end
|
52
|
+
# Sum of squares (Error)
|
53
|
+
def sse
|
54
|
+
sst - ssr
|
55
|
+
end
|
56
|
+
# T values for coeffs
|
57
|
+
def coeffs_t
|
58
|
+
out={}
|
59
|
+
se=coeffs_se
|
60
|
+
coeffs.each{|k,v|
|
61
|
+
out[k]=v / se[k]
|
62
|
+
}
|
63
|
+
out
|
64
|
+
end
|
65
|
+
# Mean square Regression
|
66
|
+
def msr
|
67
|
+
ssr.quo(df_r)
|
68
|
+
end
|
69
|
+
# Mean Square Error
|
70
|
+
def mse
|
71
|
+
sse.quo(df_e)
|
72
|
+
end
|
73
|
+
# Degrees of freedom for regression
|
74
|
+
def df_r
|
75
|
+
@dep_columns.size
|
76
|
+
end
|
77
|
+
# Degrees of freedom for error
|
78
|
+
def df_e
|
79
|
+
@ds_valid.cases-@dep_columns.size-1
|
80
|
+
end
|
81
|
+
# Fisher for Anova
|
82
|
+
def f
|
83
|
+
(ssr.quo(df_r)).quo(sse.quo(df_e))
|
84
|
+
end
|
85
|
+
# Significance of Fisher
|
86
|
+
def significance
|
87
|
+
1.0-Distribution::F.cdf(f,df_r,df_e)
|
88
|
+
end
|
89
|
+
# Tolerance for a given variable
|
90
|
+
# http://talkstats.com/showthread.php?t=5056
|
91
|
+
def tolerance(var)
|
92
|
+
ds=assign_names(@dep_columns)
|
93
|
+
ds.each{|k,v|
|
94
|
+
ds[k]=v.to_vector(:scale)
|
95
|
+
}
|
96
|
+
lr=Multiple.listwise(ds.to_dataset,var)
|
97
|
+
1-lr.r2
|
98
|
+
end
|
99
|
+
# Tolerances for each coefficient
|
100
|
+
def coeffs_tolerances
|
101
|
+
@fields.inject({}) {|a,f|
|
102
|
+
a[f]=tolerance(f);
|
103
|
+
a
|
104
|
+
}
|
105
|
+
end
|
106
|
+
# Standard Error for coefficients
|
107
|
+
def coeffs_se
|
108
|
+
out={}
|
109
|
+
mse=sse.quo(df_e)
|
110
|
+
coeffs.each {|k,v|
|
111
|
+
out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
|
112
|
+
}
|
113
|
+
out
|
114
|
+
end
|
115
|
+
# Estimated Variance-Covariance Matrix
|
116
|
+
# Used for calculation of se of constant
|
117
|
+
def estimated_variance_covariance_matrix
|
118
|
+
mse_p=mse
|
119
|
+
columns=[]
|
120
|
+
@ds_valid.each_vector{|k,v|
|
121
|
+
columns.push(v.data) unless k==@y_var
|
122
|
+
}
|
123
|
+
columns.unshift([1.0]*@ds_valid.cases)
|
124
|
+
x=Matrix.columns(columns)
|
125
|
+
matrix=((x.t*x)).inverse * mse
|
126
|
+
matrix.collect {|i|
|
127
|
+
Math::sqrt(i) if i>0
|
128
|
+
}
|
129
|
+
end
|
130
|
+
# T for constant
|
131
|
+
def constant_t
|
132
|
+
constant.to_f/constant_se
|
133
|
+
end
|
134
|
+
# Standard error for constant
|
135
|
+
def constant_se
|
136
|
+
estimated_variance_covariance_matrix[0,0]
|
137
|
+
end
|
138
|
+
# Retrieves a summary for Regression
|
139
|
+
def summary(report_type=ConsoleSummary)
|
140
|
+
c=coeffs
|
141
|
+
out=""
|
142
|
+
out.extend report_type
|
143
|
+
out.add <<HEREDOC
|
144
|
+
Summary for regression of #{@fields.join(',')} over #{@y_var}
|
145
|
+
*************************************************************
|
146
|
+
Engine: #{self.class}
|
147
|
+
Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
|
148
|
+
r=#{sprintf("%0.3f",r)}
|
149
|
+
r2=#{sprintf("%0.3f",r2)}
|
150
|
+
Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
|
151
|
+
HEREDOC
|
152
|
+
|
153
|
+
out.add_line
|
154
|
+
out.add "ANOVA TABLE"
|
155
|
+
|
156
|
+
t=Statsample::ReportTable.new(%w{source ss df ms f s})
|
157
|
+
t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
|
158
|
+
t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
|
159
|
+
|
160
|
+
t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
|
161
|
+
|
162
|
+
out.parse_table(t)
|
163
|
+
|
164
|
+
begin
|
165
|
+
out.add "Beta coefficientes"
|
166
|
+
sc=standarized_coeffs
|
167
|
+
cse=coeffs_se
|
168
|
+
t=Statsample::ReportTable.new(%w{coeff b beta se t})
|
169
|
+
t.add_row(["Constant", sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
|
170
|
+
@fields.each{|f|
|
171
|
+
t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
|
172
|
+
}
|
173
|
+
out.parse_table(t)
|
174
|
+
|
175
|
+
rescue
|
176
|
+
end
|
177
|
+
out
|
178
|
+
end
|
179
|
+
def assign_names(c)
|
180
|
+
a={}
|
181
|
+
@fields.each_index {|i|
|
182
|
+
a[@fields[i]]=c[i]
|
183
|
+
}
|
184
|
+
a
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
# Deprecated
|
189
|
+
# Sum of squares of error (manual calculation)
|
190
|
+
# using the predicted value minus the y_i value
|
191
|
+
def sse_manual
|
192
|
+
pr=predicted
|
193
|
+
cases=0
|
194
|
+
sse=(0...@ds.cases).inject(0) {|a,i|
|
195
|
+
if !@dy.data_with_nils[i].nil? and !pr[i].nil?
|
196
|
+
cases+=1
|
197
|
+
a+((pr[i]-@dy[i])**2)
|
198
|
+
else
|
199
|
+
a
|
200
|
+
end
|
201
|
+
}
|
202
|
+
sse*(min_n_valid-1.0).quo(cases-1)
|
203
|
+
end
|
204
|
+
# Sum of squares of regression
|
205
|
+
# using the predicted value minus y mean
|
206
|
+
def ssr_direct
|
207
|
+
mean=@dy.mean
|
208
|
+
cases=0
|
209
|
+
ssr=(0...@ds.cases).inject(0) {|a,i|
|
210
|
+
invalid=false
|
211
|
+
v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
|
212
|
+
if !invalid
|
213
|
+
cases+=1
|
214
|
+
a+((process(v)-mean)**2)
|
215
|
+
else
|
216
|
+
a
|
217
|
+
end
|
218
|
+
}
|
219
|
+
ssr
|
220
|
+
end
|
221
|
+
def sse_direct
|
222
|
+
sst-ssr
|
223
|
+
end
|
224
|
+
def process(v)
|
225
|
+
c=coeffs
|
226
|
+
total=constant
|
227
|
+
@fields.each_index{|i|
|
228
|
+
total+=c[@fields[i]]*v[i]
|
229
|
+
}
|
230
|
+
total
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|