statsample 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +3 -1
- data/lib/statsample.rb +175 -179
- data/lib/statsample/codification.rb +1 -1
- data/lib/statsample/converter/csv18.rb +56 -0
- data/lib/statsample/converter/csv19.rb +60 -0
- data/lib/statsample/converters.rb +26 -75
- data/lib/statsample/dataset.rb +38 -29
- data/lib/statsample/dominanceanalysis.rb +6 -6
- data/lib/statsample/graph/gdchart.rb +2 -1
- data/lib/statsample/graph/svggraph.rb +10 -9
- data/lib/statsample/multiset.rb +3 -3
- data/lib/statsample/regression/multiple.rb +43 -271
- data/lib/statsample/regression/multiple/baseengine.rb +235 -0
- data/lib/statsample/regression/multiple/gslengine.rb +2 -2
- data/lib/statsample/vector.rb +754 -736
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +22 -3
- data/test/test_distribution.rb +4 -3
- data/test/test_ggobi.rb +2 -2
- data/test/test_regression.rb +11 -2
- data/test/test_svg_graph.rb +0 -1
- data/test/test_vector.rb +50 -5
- data/test/test_xls.rb +2 -4
- metadata +5 -3
- data/test/_test_chart.rb +0 -58
@@ -1,284 +1,56 @@
|
|
1
|
+
require 'statsample/regression/multiple/baseengine'
|
1
2
|
module Statsample
|
2
|
-
module Regression
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
def self.listwise(ds,y_var)
|
22
|
-
if HAS_ALGIB
|
3
|
+
module Regression
|
4
|
+
# Module for Linear Multiple Regression Analysis
|
5
|
+
# You can call Regression::Multiple.listwise or Regression::Multiple.pairwise or instance directly the engines
|
6
|
+
# Example.
|
7
|
+
#
|
8
|
+
# require 'statsample'
|
9
|
+
# @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
10
|
+
# @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
11
|
+
# @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
12
|
+
# @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
13
|
+
# ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
14
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
15
|
+
# #<Statsample::Regression::Multiple::AlglibEngine:0x7f21912e4758 @ds_valid=#<Statsample::Dataset:69891073182680 @fields=[a,b,c,y] labels={"a"=>nil, "b"=>nil, "y"=>nil, "c"=>nil} cases=10, @lr=#<Alglib::LinearRegression:0x7f21912df118 @model=#<Alglib_ext::LinearModel:0x7f21912df708>, @ivars=3, @cases=10, @report=#<Alglib_ext::LrReport:0x7f21912df168>>, @y_var="y", @ds=#<Statsample::Dataset:69891073182680 @fields=[a,b,c,y] labels={"a"=>nil, "b"=>nil, "y"=>nil, "c"=>nil} cases=10, @fields=["a", "b", "c"], @lr_s=nil, @dep_columns=[[1, 3, 2, 4, 3, 5, 4, 6, 5, 7], [3, 3, 4, 4, 5, 5, 6, 6, 4, 4], [11, 22, 30, 40, 50, 65, 78, 79, 99, 100]], @ds_indep=#<Statsample::Dataset:69891073180060 @fields=[a,b,c] labels={"a"=>nil, "b"=>nil, "c"=>nil} cases=10, @dy=Vector(type:scale, n:10)[3,4,5,6,7,8,9,10,20,30]>
|
16
|
+
module Multiple
|
17
|
+
# Creates an object for listwise regression.
|
18
|
+
# Alglib is faster, so is prefered over GSL
|
19
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
20
|
+
def self.listwise(ds,y_var)
|
21
|
+
if HAS_ALGIB
|
23
22
|
AlglibEngine.new(ds,y_var)
|
24
|
-
|
23
|
+
elsif HAS_GSL
|
25
24
|
GslEngine.new(ds,y_var)
|
26
|
-
|
25
|
+
else
|
27
26
|
ds2=ds.dup_only_valid
|
28
27
|
RubyEngine.new(ds2,y_var)
|
28
|
+
end
|
29
29
|
end
|
30
|
-
end
|
31
|
-
|
32
|
-
# Creates an object for pairwise regression
|
33
|
-
# For now, always retrieves a RubyEngine
|
34
|
-
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
35
|
-
def self.pairwise(ds,y_var)
|
36
|
-
RubyEngine.new(ds,y_var)
|
37
|
-
end
|
38
|
-
def self.listwise_by_exp(ds,exp)
|
39
|
-
end
|
40
|
-
# Returns a dataset and name of criteria using a expression.
|
41
|
-
# All nominal vectors are replaced by dummy coding
|
42
|
-
# and interactions are calculated
|
43
|
-
|
44
|
-
def self.ds_by_exp(ds,exp)
|
45
|
-
raise "Not implemented"
|
46
|
-
parts=exp.split(/[\+=]/)
|
47
|
-
dependent=parts.pop
|
48
|
-
ds_out=[]
|
49
|
-
parts.each{|p|
|
50
|
-
|
51
|
-
}
|
52
|
-
end
|
53
|
-
# Base class for Multiple Regression Engines
|
54
|
-
class BaseEngine
|
55
|
-
def initialize(ds,y_var)
|
56
|
-
@ds=ds
|
57
|
-
@y_var=y_var
|
58
|
-
@r2=nil
|
59
|
-
end
|
60
|
-
|
61
|
-
# Retrieves a vector with predicted values for y
|
62
|
-
def predicted
|
63
|
-
(0...@ds.cases).collect { |i|
|
64
|
-
invalid=false
|
65
|
-
vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
|
66
|
-
if invalid
|
67
|
-
nil
|
68
|
-
else
|
69
|
-
process(vect)
|
70
|
-
end
|
71
|
-
}.to_vector(:scale)
|
72
|
-
end
|
73
|
-
# Retrieves a vector with standarized values for y
|
74
|
-
def standarized_predicted
|
75
|
-
predicted.standarized
|
76
|
-
end
|
77
|
-
# Retrieves a vector with residuals values for y
|
78
|
-
def residuals
|
79
|
-
(0...@ds.cases).collect{|i|
|
80
|
-
invalid=false
|
81
|
-
vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
|
82
|
-
if invalid or @ds[@y_var][i].nil?
|
83
|
-
nil
|
84
|
-
else
|
85
|
-
@ds[@y_var][i] - process(vect)
|
86
|
-
end
|
87
|
-
}.to_vector(:scale)
|
88
|
-
end
|
89
|
-
# R Multiple
|
90
|
-
def r
|
91
|
-
raise "You should implement this"
|
92
|
-
end
|
93
|
-
# Sum of squares Total
|
94
|
-
def sst
|
95
|
-
raise "You should implement this"
|
96
|
-
end
|
97
|
-
# Sum of squares (regression)
|
98
|
-
def ssr
|
99
|
-
r2*sst
|
100
|
-
end
|
101
|
-
# Sum of squares (Error)
|
102
|
-
def sse
|
103
|
-
sst - ssr
|
104
|
-
end
|
105
|
-
# T values for coeffs
|
106
|
-
def coeffs_t
|
107
|
-
out={}
|
108
|
-
se=coeffs_se
|
109
|
-
coeffs.each{|k,v|
|
110
|
-
out[k]=v / se[k]
|
111
|
-
}
|
112
|
-
out
|
113
|
-
end
|
114
|
-
# Mean square Regression
|
115
|
-
def msr
|
116
|
-
ssr.quo(df_r)
|
117
|
-
end
|
118
|
-
# Mean Square Error
|
119
|
-
def mse
|
120
|
-
sse.quo(df_e)
|
121
|
-
end
|
122
|
-
# Degrees of freedom for regression
|
123
|
-
def df_r
|
124
|
-
@dep_columns.size
|
125
|
-
end
|
126
|
-
# Degrees of freedom for error
|
127
|
-
def df_e
|
128
|
-
@ds_valid.cases-@dep_columns.size-1
|
129
|
-
end
|
130
|
-
# Fisher for Anova
|
131
|
-
def f
|
132
|
-
(ssr.quo(df_r)).quo(sse.quo(df_e))
|
133
|
-
end
|
134
|
-
# Significance of Fisher
|
135
|
-
def significance
|
136
|
-
1.0-Distribution::F.cdf(f,df_r,df_e)
|
137
|
-
end
|
138
|
-
# Tolerance for a given variable
|
139
|
-
# http://talkstats.com/showthread.php?t=5056
|
140
|
-
def tolerance(var)
|
141
|
-
ds=assign_names(@dep_columns)
|
142
|
-
ds.each{|k,v|
|
143
|
-
ds[k]=v.to_vector(:scale)
|
144
|
-
}
|
145
|
-
lr=Multiple.listwise(ds.to_dataset,var)
|
146
|
-
1-lr.r2
|
147
|
-
end
|
148
|
-
# Tolerances for each coefficient
|
149
|
-
def coeffs_tolerances
|
150
|
-
@fields.inject({}) {|a,f|
|
151
|
-
a[f]=tolerance(f);
|
152
|
-
a
|
153
|
-
}
|
154
|
-
end
|
155
|
-
# Standard Error for coefficients
|
156
|
-
def coeffs_se
|
157
|
-
out={}
|
158
|
-
mse=sse.quo(df_e)
|
159
|
-
coeffs.each {|k,v|
|
160
|
-
out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
|
161
|
-
}
|
162
|
-
out
|
163
|
-
end
|
164
|
-
# Estimated Variance-Covariance Matrix
|
165
|
-
# Used for calculation of se of constant
|
166
|
-
def estimated_variance_covariance_matrix
|
167
|
-
mse_p=mse
|
168
|
-
columns=[]
|
169
|
-
@ds_valid.each_vector{|k,v|
|
170
|
-
columns.push(v.data) unless k==@y_var
|
171
|
-
}
|
172
|
-
columns.unshift([1.0]*@ds_valid.cases)
|
173
|
-
x=Matrix.columns(columns)
|
174
|
-
matrix=((x.t*x)).inverse * mse
|
175
|
-
matrix.collect {|i|
|
176
|
-
Math::sqrt(i) if i>0
|
177
|
-
}
|
178
|
-
end
|
179
|
-
# T for constant
|
180
|
-
def constant_t
|
181
|
-
constant.to_f/constant_se
|
182
|
-
end
|
183
|
-
# Standard error for constant
|
184
|
-
def constant_se
|
185
|
-
estimated_variance_covariance_matrix[0,0]
|
186
|
-
end
|
187
|
-
# Retrieves a summary for Regression
|
188
|
-
def summary(report_type=ConsoleSummary)
|
189
|
-
c=coeffs
|
190
|
-
out=""
|
191
|
-
out.extend report_type
|
192
|
-
out.add <<HEREDOC
|
193
|
-
Summary for regression of #{@fields.join(',')} over #{@y_var}
|
194
|
-
*************************************************************
|
195
|
-
Engine: #{self.class}
|
196
|
-
Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
|
197
|
-
r=#{sprintf("%0.3f",r)}
|
198
|
-
r2=#{sprintf("%0.3f",r2)}
|
199
|
-
Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
|
200
|
-
HEREDOC
|
201
|
-
|
202
|
-
out.add_line
|
203
|
-
out.add "ANOVA TABLE"
|
204
|
-
|
205
|
-
t=Statsample::ReportTable.new(%w{source ss df ms f s})
|
206
|
-
t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
|
207
|
-
t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
|
208
|
-
|
209
|
-
t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
|
210
30
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
cse=coeffs_se
|
217
|
-
t=Statsample::ReportTable.new(%w{coeff b beta se t})
|
218
|
-
t.add_row(["Constant", sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
|
219
|
-
@fields.each{|f|
|
220
|
-
t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
|
221
|
-
}
|
222
|
-
out.parse_table(t)
|
223
|
-
|
224
|
-
rescue
|
31
|
+
# Creates an object for pairwise regression
|
32
|
+
# For now, always retrieves a RubyEngine
|
33
|
+
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
34
|
+
def self.pairwise(ds,y_var)
|
35
|
+
RubyEngine.new(ds,y_var)
|
225
36
|
end
|
226
|
-
|
37
|
+
def self.listwise_by_exp(ds,exp)
|
38
|
+
raise "Not implemented yet"
|
227
39
|
end
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
40
|
+
# Returns a dataset and name of criteria using a expression.
|
41
|
+
# All nominal vectors are replaced by dummy coding
|
42
|
+
# and interactions are calculated
|
43
|
+
|
44
|
+
def self.ds_by_exp(ds,exp)
|
45
|
+
raise "Not implemented"
|
46
|
+
parts=exp.split(/[\+=]/)
|
47
|
+
dependent=parts.pop
|
48
|
+
ds_out=[]
|
49
|
+
parts.each{|p|
|
50
|
+
|
51
|
+
}
|
234
52
|
end
|
235
|
-
|
236
53
|
|
237
|
-
# Deprecated
|
238
|
-
# Sum of squares of error (manual calculation)
|
239
|
-
# using the predicted value minus the y_i value
|
240
|
-
def sse_manual
|
241
|
-
pr=predicted
|
242
|
-
cases=0
|
243
|
-
sse=(0...@ds.cases).inject(0) {|a,i|
|
244
|
-
if !@dy.data_with_nils[i].nil? and !pr[i].nil?
|
245
|
-
cases+=1
|
246
|
-
a+((pr[i]-@dy[i])**2)
|
247
|
-
else
|
248
|
-
a
|
249
|
-
end
|
250
|
-
}
|
251
|
-
sse*(min_n_valid-1.0).quo(cases-1)
|
252
|
-
end
|
253
|
-
# Sum of squares of regression
|
254
|
-
# using the predicted value minus y mean
|
255
|
-
def ssr_direct
|
256
|
-
mean=@dy.mean
|
257
|
-
cases=0
|
258
|
-
ssr=(0...@ds.cases).inject(0) {|a,i|
|
259
|
-
invalid=false
|
260
|
-
v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
|
261
|
-
if !invalid
|
262
|
-
cases+=1
|
263
|
-
a+((process(v)-mean)**2)
|
264
|
-
else
|
265
|
-
a
|
266
|
-
end
|
267
|
-
}
|
268
|
-
ssr
|
269
|
-
end
|
270
|
-
def sse_direct
|
271
|
-
sst-ssr
|
272
54
|
end
|
273
|
-
|
274
|
-
c=coeffs
|
275
|
-
total=constant
|
276
|
-
@fields.each_index{|i|
|
277
|
-
total+=c[@fields[i]]*v[i]
|
278
|
-
}
|
279
|
-
total
|
280
|
-
end
|
281
|
-
end
|
282
|
-
end
|
283
|
-
end
|
55
|
+
end
|
284
56
|
end
|
@@ -0,0 +1,235 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
module Multiple
|
4
|
+
# Base class for Multiple Regression Engines
|
5
|
+
class BaseEngine
|
6
|
+
def initialize(ds,y_var)
|
7
|
+
@ds=ds
|
8
|
+
@y_var=y_var
|
9
|
+
@r2=nil
|
10
|
+
end
|
11
|
+
|
12
|
+
# Retrieves a vector with predicted values for y
|
13
|
+
def predicted
|
14
|
+
(0...@ds.cases).collect { |i|
|
15
|
+
invalid=false
|
16
|
+
vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
|
17
|
+
if invalid
|
18
|
+
nil
|
19
|
+
else
|
20
|
+
process(vect)
|
21
|
+
end
|
22
|
+
}.to_vector(:scale)
|
23
|
+
end
|
24
|
+
# Retrieves a vector with standarized values for y
|
25
|
+
def standarized_predicted
|
26
|
+
predicted.standarized
|
27
|
+
end
|
28
|
+
# Retrieves a vector with residuals values for y
|
29
|
+
def residuals
|
30
|
+
(0...@ds.cases).collect{|i|
|
31
|
+
invalid=false
|
32
|
+
vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
|
33
|
+
if invalid or @ds[@y_var][i].nil?
|
34
|
+
nil
|
35
|
+
else
|
36
|
+
@ds[@y_var][i] - process(vect)
|
37
|
+
end
|
38
|
+
}.to_vector(:scale)
|
39
|
+
end
|
40
|
+
# R Multiple
|
41
|
+
def r
|
42
|
+
raise "You should implement this"
|
43
|
+
end
|
44
|
+
# Sum of squares Total
|
45
|
+
def sst
|
46
|
+
raise "You should implement this"
|
47
|
+
end
|
48
|
+
# Sum of squares (regression)
|
49
|
+
def ssr
|
50
|
+
r2*sst
|
51
|
+
end
|
52
|
+
# Sum of squares (Error)
|
53
|
+
def sse
|
54
|
+
sst - ssr
|
55
|
+
end
|
56
|
+
# T values for coeffs
|
57
|
+
def coeffs_t
|
58
|
+
out={}
|
59
|
+
se=coeffs_se
|
60
|
+
coeffs.each{|k,v|
|
61
|
+
out[k]=v / se[k]
|
62
|
+
}
|
63
|
+
out
|
64
|
+
end
|
65
|
+
# Mean square Regression
|
66
|
+
def msr
|
67
|
+
ssr.quo(df_r)
|
68
|
+
end
|
69
|
+
# Mean Square Error
|
70
|
+
def mse
|
71
|
+
sse.quo(df_e)
|
72
|
+
end
|
73
|
+
# Degrees of freedom for regression
|
74
|
+
def df_r
|
75
|
+
@dep_columns.size
|
76
|
+
end
|
77
|
+
# Degrees of freedom for error
|
78
|
+
def df_e
|
79
|
+
@ds_valid.cases-@dep_columns.size-1
|
80
|
+
end
|
81
|
+
# Fisher for Anova
|
82
|
+
def f
|
83
|
+
(ssr.quo(df_r)).quo(sse.quo(df_e))
|
84
|
+
end
|
85
|
+
# Significance of Fisher
|
86
|
+
def significance
|
87
|
+
1.0-Distribution::F.cdf(f,df_r,df_e)
|
88
|
+
end
|
89
|
+
# Tolerance for a given variable
|
90
|
+
# http://talkstats.com/showthread.php?t=5056
|
91
|
+
def tolerance(var)
|
92
|
+
ds=assign_names(@dep_columns)
|
93
|
+
ds.each{|k,v|
|
94
|
+
ds[k]=v.to_vector(:scale)
|
95
|
+
}
|
96
|
+
lr=Multiple.listwise(ds.to_dataset,var)
|
97
|
+
1-lr.r2
|
98
|
+
end
|
99
|
+
# Tolerances for each coefficient
|
100
|
+
def coeffs_tolerances
|
101
|
+
@fields.inject({}) {|a,f|
|
102
|
+
a[f]=tolerance(f);
|
103
|
+
a
|
104
|
+
}
|
105
|
+
end
|
106
|
+
# Standard Error for coefficients
|
107
|
+
def coeffs_se
|
108
|
+
out={}
|
109
|
+
mse=sse.quo(df_e)
|
110
|
+
coeffs.each {|k,v|
|
111
|
+
out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
|
112
|
+
}
|
113
|
+
out
|
114
|
+
end
|
115
|
+
# Estimated Variance-Covariance Matrix
|
116
|
+
# Used for calculation of se of constant
|
117
|
+
def estimated_variance_covariance_matrix
|
118
|
+
mse_p=mse
|
119
|
+
columns=[]
|
120
|
+
@ds_valid.each_vector{|k,v|
|
121
|
+
columns.push(v.data) unless k==@y_var
|
122
|
+
}
|
123
|
+
columns.unshift([1.0]*@ds_valid.cases)
|
124
|
+
x=Matrix.columns(columns)
|
125
|
+
matrix=((x.t*x)).inverse * mse
|
126
|
+
matrix.collect {|i|
|
127
|
+
Math::sqrt(i) if i>0
|
128
|
+
}
|
129
|
+
end
|
130
|
+
# T for constant
|
131
|
+
def constant_t
|
132
|
+
constant.to_f/constant_se
|
133
|
+
end
|
134
|
+
# Standard error for constant
|
135
|
+
def constant_se
|
136
|
+
estimated_variance_covariance_matrix[0,0]
|
137
|
+
end
|
138
|
+
# Retrieves a summary for Regression
|
139
|
+
def summary(report_type=ConsoleSummary)
|
140
|
+
c=coeffs
|
141
|
+
out=""
|
142
|
+
out.extend report_type
|
143
|
+
out.add <<HEREDOC
|
144
|
+
Summary for regression of #{@fields.join(',')} over #{@y_var}
|
145
|
+
*************************************************************
|
146
|
+
Engine: #{self.class}
|
147
|
+
Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
|
148
|
+
r=#{sprintf("%0.3f",r)}
|
149
|
+
r2=#{sprintf("%0.3f",r2)}
|
150
|
+
Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
|
151
|
+
HEREDOC
|
152
|
+
|
153
|
+
out.add_line
|
154
|
+
out.add "ANOVA TABLE"
|
155
|
+
|
156
|
+
t=Statsample::ReportTable.new(%w{source ss df ms f s})
|
157
|
+
t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
|
158
|
+
t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
|
159
|
+
|
160
|
+
t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
|
161
|
+
|
162
|
+
out.parse_table(t)
|
163
|
+
|
164
|
+
begin
|
165
|
+
out.add "Beta coefficientes"
|
166
|
+
sc=standarized_coeffs
|
167
|
+
cse=coeffs_se
|
168
|
+
t=Statsample::ReportTable.new(%w{coeff b beta se t})
|
169
|
+
t.add_row(["Constant", sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
|
170
|
+
@fields.each{|f|
|
171
|
+
t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
|
172
|
+
}
|
173
|
+
out.parse_table(t)
|
174
|
+
|
175
|
+
rescue
|
176
|
+
end
|
177
|
+
out
|
178
|
+
end
|
179
|
+
def assign_names(c)
|
180
|
+
a={}
|
181
|
+
@fields.each_index {|i|
|
182
|
+
a[@fields[i]]=c[i]
|
183
|
+
}
|
184
|
+
a
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
# Deprecated
|
189
|
+
# Sum of squares of error (manual calculation)
|
190
|
+
# using the predicted value minus the y_i value
|
191
|
+
def sse_manual
|
192
|
+
pr=predicted
|
193
|
+
cases=0
|
194
|
+
sse=(0...@ds.cases).inject(0) {|a,i|
|
195
|
+
if !@dy.data_with_nils[i].nil? and !pr[i].nil?
|
196
|
+
cases+=1
|
197
|
+
a+((pr[i]-@dy[i])**2)
|
198
|
+
else
|
199
|
+
a
|
200
|
+
end
|
201
|
+
}
|
202
|
+
sse*(min_n_valid-1.0).quo(cases-1)
|
203
|
+
end
|
204
|
+
# Sum of squares of regression
|
205
|
+
# using the predicted value minus y mean
|
206
|
+
def ssr_direct
|
207
|
+
mean=@dy.mean
|
208
|
+
cases=0
|
209
|
+
ssr=(0...@ds.cases).inject(0) {|a,i|
|
210
|
+
invalid=false
|
211
|
+
v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
|
212
|
+
if !invalid
|
213
|
+
cases+=1
|
214
|
+
a+((process(v)-mean)**2)
|
215
|
+
else
|
216
|
+
a
|
217
|
+
end
|
218
|
+
}
|
219
|
+
ssr
|
220
|
+
end
|
221
|
+
def sse_direct
|
222
|
+
sst-ssr
|
223
|
+
end
|
224
|
+
def process(v)
|
225
|
+
c=coeffs
|
226
|
+
total=constant
|
227
|
+
@fields.each_index{|i|
|
228
|
+
total+=c[@fields[i]]*v[i]
|
229
|
+
}
|
230
|
+
total
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|