statsample 0.6.1 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +8 -19
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/demo/dominance_analysis_bootstrap.rb +20 -0
- data/demo/dominanceanalysis.rb +11 -0
- data/demo/multiple_regression.rb +40 -0
- data/demo/polychoric.rb +13 -0
- data/demo/tetrachoric.rb +10 -0
- data/lib/distribution.rb +1 -0
- data/lib/distribution/normalbivariate.rb +100 -0
- data/lib/statsample.rb +4 -105
- data/lib/statsample/bivariate.rb +5 -1
- data/lib/statsample/bivariate/polychoric.rb +581 -0
- data/lib/statsample/bivariate/tetrachoric.rb +37 -5
- data/lib/statsample/converters.rb +11 -0
- data/lib/statsample/dominanceanalysis.rb +104 -90
- data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
- data/lib/statsample/factor/pca.rb +1 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/svghistogram.rb +170 -172
- data/lib/statsample/matrix.rb +79 -0
- data/lib/statsample/mle.rb +6 -4
- data/lib/statsample/mle/probit.rb +0 -1
- data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
- data/lib/statsample/regression/multiple/baseengine.rb +112 -113
- data/lib/statsample/regression/multiple/gslengine.rb +91 -94
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/srs.rb +1 -1
- data/lib/statsample/test.rb +0 -1
- data/lib/statsample/test/umannwhitney.rb +8 -5
- data/po/es/statsample.po +201 -39
- data/po/statsample.pot +184 -32
- data/test/test_bivariate.rb +21 -2
- data/test/test_distribution.rb +58 -40
- data/test/test_factor.rb +0 -1
- data/test/test_gsl.rb +13 -14
- data/test/test_regression.rb +1 -1
- data/test/test_statistics.rb +1 -4
- metadata +10 -21
- data/demo/benchmark.rb +0 -76
- data/demo/chi-square.rb +0 -44
- data/demo/crosstab.rb +0 -7
- data/demo/dice.rb +0 -13
- data/demo/distribution_t.rb +0 -95
- data/demo/graph.rb +0 -9
- data/demo/item_analysis.rb +0 -30
- data/demo/mean.rb +0 -81
- data/demo/nunnally_6.rb +0 -34
- data/demo/pca.rb +0 -29
- data/demo/proportion.rb +0 -57
- data/demo/regression.rb +0 -82
- data/demo/sample_test.csv +0 -113
- data/demo/spss_matrix.rb +0 -3
- data/demo/strata_proportion.rb +0 -152
- data/demo/stratum.rb +0 -141
- data/demo/t-student.rb +0 -17
- data/demo/umann.rb +0 -8
- data/lib/matrix_extension.rb +0 -92
data/lib/statsample/mle.rb
CHANGED
@@ -10,7 +10,7 @@ module Statsample
|
|
10
10
|
# iterations=mle.iterations
|
11
11
|
#
|
12
12
|
module MLE
|
13
|
-
|
13
|
+
class BaseMLE
|
14
14
|
attr_accessor :verbose
|
15
15
|
attr_accessor :output
|
16
16
|
# Could be :parameters or :mle
|
@@ -40,7 +40,7 @@ module Statsample
|
|
40
40
|
xi=Matrix.rows([x.row(i).to_a.collect{|v| v.to_f}])
|
41
41
|
y_val=y[i,0].to_f
|
42
42
|
fbx=f(b,x)
|
43
|
-
prod=prod*likehood_i(xi,y_val,b)
|
43
|
+
prod=prod*likehood_i(xi, y_val ,b)
|
44
44
|
}
|
45
45
|
prod
|
46
46
|
end
|
@@ -54,6 +54,8 @@ module Statsample
|
|
54
54
|
}
|
55
55
|
sum
|
56
56
|
end
|
57
|
+
|
58
|
+
|
57
59
|
# Creates a zero matrix Mx1, with M=x.M
|
58
60
|
def set_default_parameters(x)
|
59
61
|
fd=[0.0]*x.column_size
|
@@ -82,9 +84,9 @@ module Statsample
|
|
82
84
|
h=nil
|
83
85
|
fd=nil
|
84
86
|
if @stop_criteria==:mle
|
85
|
-
|
87
|
+
old_likehood=log_likehood(x, y, parameters)
|
86
88
|
else
|
87
|
-
|
89
|
+
old_parameters=parameters
|
88
90
|
end
|
89
91
|
ITERATIONS.times do |i|
|
90
92
|
@iterations=i+1
|
@@ -17,29 +17,29 @@ module Multiple
|
|
17
17
|
# lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,'y')
|
18
18
|
#
|
19
19
|
class AlglibEngine < BaseEngine
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
20
|
+
def initialize(ds,y_var, opts=Hash.new)
|
21
|
+
super
|
22
|
+
@ds=ds.dup_only_valid
|
23
|
+
@ds_valid=@ds
|
24
|
+
@dy=@ds[@y_var]
|
25
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
26
|
+
# Create a custom matrix
|
27
|
+
columns=[]
|
28
|
+
@fields=[]
|
29
|
+
@ds.fields.each{|f|
|
30
|
+
if f!=@y_var
|
31
|
+
columns.push(@ds[f].to_a)
|
32
|
+
@fields.push(f)
|
33
|
+
end
|
34
|
+
}
|
35
|
+
@dep_columns=columns.dup
|
36
|
+
columns.push(@ds[@y_var])
|
37
|
+
matrix=Matrix.columns(columns)
|
38
|
+
@lr_s=nil
|
39
|
+
@lr=::Alglib::LinearRegression.build_from_matrix(matrix)
|
40
|
+
@coeffs=assign_names(@lr.coeffs)
|
41
|
+
|
42
|
+
end
|
43
43
|
|
44
44
|
def _dump(i)
|
45
45
|
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
@@ -3,188 +3,187 @@ module Statsample
|
|
3
3
|
module Multiple
|
4
4
|
# Base class for Multiple Regression Engines
|
5
5
|
class BaseEngine
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
include GetText
|
7
|
+
bindtextdomain("statsample")
|
8
|
+
# Name of analysis
|
9
|
+
attr_accessor :name
|
10
|
+
def initialize(ds, y_var, opts = Hash.new)
|
11
|
+
@ds=ds
|
12
|
+
@y_var=y_var
|
13
|
+
@r2=nil
|
14
|
+
@name=_("Multiple Regression: %s over %s") % [ ds.fields.join(",") , @y_var]
|
15
|
+
opts.each{|k,v|
|
16
|
+
self.send("#{k}=",v) if self.respond_to? k
|
17
|
+
}
|
10
18
|
end
|
11
19
|
|
12
20
|
# Retrieves a vector with predicted values for y
|
13
21
|
def predicted
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
22
|
+
(0...@ds.cases).collect { |i|
|
23
|
+
invalid=false
|
24
|
+
vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
|
25
|
+
if invalid
|
26
|
+
nil
|
27
|
+
else
|
28
|
+
process(vect)
|
29
|
+
end
|
30
|
+
}.to_vector(:scale)
|
23
31
|
end
|
24
32
|
# Retrieves a vector with standarized values for y
|
25
33
|
def standarized_predicted
|
26
|
-
|
34
|
+
predicted.standarized
|
27
35
|
end
|
28
36
|
# Retrieves a vector with residuals values for y
|
29
37
|
def residuals
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
38
|
+
(0...@ds.cases).collect{|i|
|
39
|
+
invalid=false
|
40
|
+
vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
|
41
|
+
if invalid or @ds[@y_var][i].nil?
|
42
|
+
nil
|
43
|
+
else
|
44
|
+
@ds[@y_var][i] - process(vect)
|
45
|
+
end
|
46
|
+
}.to_vector(:scale)
|
39
47
|
end
|
40
48
|
# R Multiple
|
41
49
|
def r
|
42
|
-
|
50
|
+
raise "You should implement this"
|
43
51
|
end
|
44
52
|
# Sum of squares Total
|
45
53
|
def sst
|
46
|
-
|
54
|
+
raise "You should implement this"
|
47
55
|
end
|
48
56
|
# Sum of squares (regression)
|
49
57
|
def ssr
|
50
|
-
|
58
|
+
r2*sst
|
51
59
|
end
|
52
60
|
# Sum of squares (Error)
|
53
61
|
def sse
|
54
|
-
|
62
|
+
sst - ssr
|
55
63
|
end
|
56
64
|
# T values for coeffs
|
57
65
|
def coeffs_t
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
66
|
+
out={}
|
67
|
+
se=coeffs_se
|
68
|
+
coeffs.each do |k,v|
|
69
|
+
out[k]=v / se[k]
|
70
|
+
end
|
71
|
+
out
|
64
72
|
end
|
65
73
|
# Mean square Regression
|
66
74
|
def msr
|
67
|
-
|
75
|
+
ssr.quo(df_r)
|
68
76
|
end
|
69
77
|
# Mean Square Error
|
70
78
|
def mse
|
71
|
-
|
79
|
+
sse.quo(df_e)
|
72
80
|
end
|
73
81
|
# Degrees of freedom for regression
|
74
82
|
def df_r
|
75
|
-
|
83
|
+
@dep_columns.size
|
76
84
|
end
|
77
85
|
# Degrees of freedom for error
|
78
86
|
def df_e
|
79
|
-
|
87
|
+
@ds_valid.cases-@dep_columns.size-1
|
80
88
|
end
|
81
89
|
# Fisher for Anova
|
82
90
|
def f
|
83
|
-
|
91
|
+
(ssr.quo(df_r)).quo(sse.quo(df_e))
|
84
92
|
end
|
85
93
|
# Significance of Fisher
|
86
94
|
def significance
|
87
|
-
|
95
|
+
1.0-Distribution::F.cdf(f,df_r,df_e)
|
88
96
|
end
|
89
97
|
# Tolerance for a given variable
|
90
98
|
# http://talkstats.com/showthread.php?t=5056
|
91
99
|
def tolerance(var)
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
100
|
+
ds=assign_names(@dep_columns)
|
101
|
+
ds.each{|k,v|
|
102
|
+
ds[k]=v.to_vector(:scale)
|
103
|
+
}
|
104
|
+
lr=Multiple.listwise(ds.to_dataset,var)
|
105
|
+
1-lr.r2
|
98
106
|
end
|
99
107
|
# Tolerances for each coefficient
|
100
108
|
def coeffs_tolerances
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
109
|
+
@fields.inject({}) {|a,f|
|
110
|
+
a[f]=tolerance(f);
|
111
|
+
a
|
112
|
+
}
|
105
113
|
end
|
106
114
|
# Standard Error for coefficients
|
107
115
|
def coeffs_se
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
116
|
+
out={}
|
117
|
+
mse=sse.quo(df_e)
|
118
|
+
coeffs.each {|k,v|
|
119
|
+
out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
|
120
|
+
}
|
121
|
+
out
|
114
122
|
end
|
115
123
|
# Estimated Variance-Covariance Matrix
|
116
124
|
# Used for calculation of se of constant
|
117
125
|
def estimated_variance_covariance_matrix
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
Math::sqrt(i) if i>0
|
128
|
-
}
|
126
|
+
mse_p=mse
|
127
|
+
columns=[]
|
128
|
+
@ds_valid.each_vector{|k,v|
|
129
|
+
columns.push(v.data) unless k==@y_var
|
130
|
+
}
|
131
|
+
columns.unshift([1.0]*@ds_valid.cases)
|
132
|
+
x=Matrix.columns(columns)
|
133
|
+
matrix=((x.t*x)).inverse * mse
|
134
|
+
matrix.collect {|i| Math::sqrt(i) if i>0 }
|
129
135
|
end
|
130
136
|
# T for constant
|
131
137
|
def constant_t
|
132
|
-
|
138
|
+
constant.to_f/constant_se
|
133
139
|
end
|
134
140
|
# Standard error for constant
|
135
141
|
def constant_se
|
136
|
-
|
137
|
-
end
|
138
|
-
# Retrieves a summary for Regression
|
139
|
-
def summary(report_type=ConsoleSummary)
|
140
|
-
c=coeffs
|
141
|
-
out=""
|
142
|
-
out.extend report_type
|
143
|
-
out.add <<-HEREDOC
|
144
|
-
Summary for regression of #{@fields.join(',')} over #{@y_var}
|
145
|
-
*************************************************************
|
146
|
-
Engine: #{self.class}
|
147
|
-
Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
|
148
|
-
r=#{sprintf("%0.3f",r)}
|
149
|
-
r2=#{sprintf("%0.3f",r2)}
|
150
|
-
Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
|
151
|
-
HEREDOC
|
152
|
-
|
153
|
-
out.add_line
|
154
|
-
out.add "ANOVA TABLE"
|
155
|
-
|
156
|
-
t=Statsample::ReportTable.new(%w{source ss df ms f s})
|
157
|
-
t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
|
158
|
-
t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
|
159
|
-
|
160
|
-
t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
|
161
|
-
|
162
|
-
out.parse_table(t)
|
163
|
-
|
164
|
-
begin
|
165
|
-
out.add "Beta coefficientes"
|
166
|
-
sc=standarized_coeffs
|
167
|
-
cse=coeffs_se
|
168
|
-
t=Statsample::ReportTable.new(%w{coeff b beta se t})
|
169
|
-
t.add_row(["Constant", sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
|
170
|
-
@fields.each{|f|
|
171
|
-
t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
|
172
|
-
}
|
173
|
-
out.parse_table(t)
|
174
|
-
|
175
|
-
rescue
|
142
|
+
estimated_variance_covariance_matrix[0,0]
|
176
143
|
end
|
177
|
-
|
144
|
+
def summary
|
145
|
+
rp=ReportBuilder.new()
|
146
|
+
rp.add(self)
|
147
|
+
rp.to_text
|
178
148
|
end
|
149
|
+
def to_reportbuilder(generator)
|
150
|
+
anchor=generator.add_toc_entry(_("Multiple Regression: ")+@name)
|
151
|
+
generator.add_html "<div class='multiple-regression'>#{@name}<a name='#{anchor}'></a>"
|
152
|
+
c=coeffs
|
153
|
+
generator.add_text(_("Engine: %s") % self.class)
|
154
|
+
generator.add_text(_("Cases(listwise)=%d(%d)") % [@ds.cases, @ds_valid.cases])
|
155
|
+
generator.add_text("r=#{sprintf('%0.3f',r)}")
|
156
|
+
generator.add_text("r=#{sprintf('%0.3f',r2)}")
|
157
|
+
|
158
|
+
generator.add_text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') )
|
159
|
+
|
160
|
+
t=ReportBuilder::Table.new(:name=>"ANOVA", :header=>%w{source ss df ms f s})
|
161
|
+
t.add_row([_("Regression"), sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
|
162
|
+
t.add_row([_("Error"), sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
|
163
|
+
|
164
|
+
t.add_row([_("Total"), sprintf("%0.3f",sst), df_r+df_e])
|
165
|
+
generator.parse_element(t)
|
166
|
+
sc=standarized_coeffs
|
167
|
+
cse=coeffs_se
|
168
|
+
t=ReportBuilder::Table.new(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} )
|
169
|
+
|
170
|
+
t.add_row([_("Constant"), sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
|
171
|
+
@fields.each do |f|
|
172
|
+
t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
|
173
|
+
end
|
174
|
+
generator.parse_element(t)
|
175
|
+
generator.add_html("</div>")
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
179
|
def assign_names(c)
|
180
180
|
a={}
|
181
181
|
@fields.each_index {|i|
|
182
|
-
|
182
|
+
a[@fields[i]]=c[i]
|
183
183
|
}
|
184
184
|
a
|
185
185
|
end
|
186
186
|
|
187
|
-
|
188
187
|
# Deprecated
|
189
188
|
# Sum of squares of error (manual calculation)
|
190
189
|
# using the predicted value minus the y_i value
|
@@ -193,10 +192,10 @@ HEREDOC
|
|
193
192
|
cases=0
|
194
193
|
sse=(0...@ds.cases).inject(0) {|a,i|
|
195
194
|
if !@dy.data_with_nils[i].nil? and !pr[i].nil?
|
196
|
-
|
197
|
-
|
195
|
+
cases+=1
|
196
|
+
a+((pr[i]-@dy[i])**2)
|
198
197
|
else
|
199
|
-
|
198
|
+
a
|
200
199
|
end
|
201
200
|
}
|
202
201
|
sse*(min_n_valid-1.0).quo(cases-1)
|
@@ -17,105 +17,102 @@ module Multiple
|
|
17
17
|
# lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y')
|
18
18
|
#
|
19
19
|
class GslEngine < BaseEngine
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
end
|
34
|
-
j=0
|
35
|
-
@ds.fields.each{|f|
|
36
|
-
if f!=@y_var
|
37
|
-
@ds[f].each_index{|i1|
|
38
|
-
max_deps.set(i1,j,@ds[f][i1])
|
39
|
-
}
|
40
|
-
columns.push(@ds[f].to_a)
|
41
|
-
@fields.push(f)
|
42
|
-
j+=1
|
43
|
-
end
|
44
|
-
}
|
45
|
-
@dep_columns=columns.dup
|
46
|
-
@lr_s=nil
|
47
|
-
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
|
48
|
-
@constant=c[constant_col]
|
49
|
-
@coeffs_a=c.to_a.slice(0...constant_col)
|
50
|
-
@coeffs=assign_names(@coeffs_a)
|
51
|
-
c=nil
|
52
|
-
end
|
53
|
-
|
54
|
-
def _dump(i)
|
55
|
-
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
56
|
-
end
|
57
|
-
def self._load(data)
|
58
|
-
h=Marshal.load(data)
|
59
|
-
self.new(h['ds'], h['y_var'])
|
60
|
-
end
|
61
|
-
|
62
|
-
def coeffs
|
63
|
-
@coeffs
|
64
|
-
end
|
65
|
-
# Coefficients using a constant
|
66
|
-
# Based on http://www.xycoon.com/ols1.htm
|
67
|
-
def matrix_resolution
|
68
|
-
mse_p=mse
|
69
|
-
columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
|
70
|
-
columns.unshift([1.0]*@ds.cases)
|
71
|
-
y=Matrix.columns([@dy.data.map {|i| i.to_f}])
|
72
|
-
x=Matrix.columns(columns)
|
73
|
-
xt=x.t
|
74
|
-
matrix=((xt*x)).inverse*xt
|
75
|
-
matrix*y
|
76
|
-
end
|
77
|
-
def r2
|
78
|
-
r**2
|
20
|
+
def initialize(ds,y_var, opts=Hash.new)
|
21
|
+
super
|
22
|
+
@ds=ds.dup_only_valid
|
23
|
+
@ds_valid=@ds
|
24
|
+
@dy=@ds[@y_var]
|
25
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
26
|
+
# Create a custom matrix
|
27
|
+
columns=[]
|
28
|
+
@fields=[]
|
29
|
+
max_deps = GSL::Matrix.alloc(@ds.cases, @ds.fields.size)
|
30
|
+
constant_col=@ds.fields.size-1
|
31
|
+
for i in 0...@ds.cases
|
32
|
+
max_deps.set(i,constant_col,1)
|
79
33
|
end
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
def standarized_coeffs
|
90
|
-
l=lr_s
|
91
|
-
l.coeffs
|
92
|
-
end
|
93
|
-
def lr_s
|
94
|
-
if @lr_s.nil?
|
95
|
-
build_standarized
|
34
|
+
j=0
|
35
|
+
@ds.fields.each{|f|
|
36
|
+
if f!=@y_var
|
37
|
+
@ds[f].each_index{|i1|
|
38
|
+
max_deps.set(i1,j,@ds[f][i1])
|
39
|
+
}
|
40
|
+
columns.push(@ds[f].to_a)
|
41
|
+
@fields.push(f)
|
42
|
+
j+=1
|
96
43
|
end
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
44
|
+
}
|
45
|
+
@dep_columns=columns.dup
|
46
|
+
@lr_s=nil
|
47
|
+
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
|
48
|
+
@constant=c[constant_col]
|
49
|
+
@coeffs_a=c.to_a.slice(0...constant_col)
|
50
|
+
@coeffs=assign_names(@coeffs_a)
|
51
|
+
c=nil
|
52
|
+
end
|
53
|
+
|
54
|
+
def _dump(i)
|
55
|
+
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
56
|
+
end
|
57
|
+
def self._load(data)
|
58
|
+
h=Marshal.load(data)
|
59
|
+
self.new(h['ds'], h['y_var'])
|
60
|
+
end
|
61
|
+
|
62
|
+
def coeffs
|
63
|
+
@coeffs
|
64
|
+
end
|
65
|
+
# Coefficients using a constant
|
66
|
+
# Based on http://www.xycoon.com/ols1.htm
|
67
|
+
def matrix_resolution
|
68
|
+
mse_p=mse
|
69
|
+
columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
|
70
|
+
columns.unshift([1.0]*@ds.cases)
|
71
|
+
y=Matrix.columns([@dy.data.map {|i| i.to_f}])
|
72
|
+
x=Matrix.columns(columns)
|
73
|
+
xt=x.t
|
74
|
+
matrix=((xt*x)).inverse*xt
|
75
|
+
matrix*y
|
76
|
+
end
|
77
|
+
def r2
|
78
|
+
r**2
|
79
|
+
end
|
80
|
+
def r
|
81
|
+
Bivariate::pearson(@dy, predicted)
|
82
|
+
end
|
83
|
+
def sst
|
84
|
+
@dy.ss
|
85
|
+
end
|
86
|
+
def constant
|
87
|
+
@constant
|
88
|
+
end
|
89
|
+
def standarized_coeffs
|
90
|
+
l=lr_s
|
91
|
+
l.coeffs
|
92
|
+
end
|
93
|
+
def lr_s
|
94
|
+
if @lr_s.nil?
|
95
|
+
build_standarized
|
113
96
|
end
|
97
|
+
@lr_s
|
98
|
+
end
|
99
|
+
def build_standarized
|
100
|
+
@ds_s=@ds.standarize
|
101
|
+
@lr_s=GslEngine.new(@ds_s,@y_var)
|
102
|
+
end
|
103
|
+
def process_s(v)
|
104
|
+
lr_s.process(v)
|
105
|
+
end
|
106
|
+
# ???? Not equal to SPSS output
|
107
|
+
def standarized_residuals
|
108
|
+
res=residuals
|
109
|
+
red_sd=residuals.sds
|
110
|
+
res.collect {|v|
|
111
|
+
v.quo(red_sd)
|
112
|
+
}.to_vector(:scale)
|
113
|
+
end
|
114
114
|
end
|
115
115
|
end
|
116
116
|
end
|
117
117
|
end # for Statsample
|
118
118
|
end # for if
|
119
|
-
|
120
|
-
|
121
|
-
|