statsample 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +8 -19
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/demo/dominance_analysis_bootstrap.rb +20 -0
- data/demo/dominanceanalysis.rb +11 -0
- data/demo/multiple_regression.rb +40 -0
- data/demo/polychoric.rb +13 -0
- data/demo/tetrachoric.rb +10 -0
- data/lib/distribution.rb +1 -0
- data/lib/distribution/normalbivariate.rb +100 -0
- data/lib/statsample.rb +4 -105
- data/lib/statsample/bivariate.rb +5 -1
- data/lib/statsample/bivariate/polychoric.rb +581 -0
- data/lib/statsample/bivariate/tetrachoric.rb +37 -5
- data/lib/statsample/converters.rb +11 -0
- data/lib/statsample/dominanceanalysis.rb +104 -90
- data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
- data/lib/statsample/factor/pca.rb +1 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/svghistogram.rb +170 -172
- data/lib/statsample/matrix.rb +79 -0
- data/lib/statsample/mle.rb +6 -4
- data/lib/statsample/mle/probit.rb +0 -1
- data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
- data/lib/statsample/regression/multiple/baseengine.rb +112 -113
- data/lib/statsample/regression/multiple/gslengine.rb +91 -94
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/srs.rb +1 -1
- data/lib/statsample/test.rb +0 -1
- data/lib/statsample/test/umannwhitney.rb +8 -5
- data/po/es/statsample.po +201 -39
- data/po/statsample.pot +184 -32
- data/test/test_bivariate.rb +21 -2
- data/test/test_distribution.rb +58 -40
- data/test/test_factor.rb +0 -1
- data/test/test_gsl.rb +13 -14
- data/test/test_regression.rb +1 -1
- data/test/test_statistics.rb +1 -4
- metadata +10 -21
- data/demo/benchmark.rb +0 -76
- data/demo/chi-square.rb +0 -44
- data/demo/crosstab.rb +0 -7
- data/demo/dice.rb +0 -13
- data/demo/distribution_t.rb +0 -95
- data/demo/graph.rb +0 -9
- data/demo/item_analysis.rb +0 -30
- data/demo/mean.rb +0 -81
- data/demo/nunnally_6.rb +0 -34
- data/demo/pca.rb +0 -29
- data/demo/proportion.rb +0 -57
- data/demo/regression.rb +0 -82
- data/demo/sample_test.csv +0 -113
- data/demo/spss_matrix.rb +0 -3
- data/demo/strata_proportion.rb +0 -152
- data/demo/stratum.rb +0 -141
- data/demo/t-student.rb +0 -17
- data/demo/umann.rb +0 -8
- data/lib/matrix_extension.rb +0 -92
data/lib/statsample/mle.rb
CHANGED
@@ -10,7 +10,7 @@ module Statsample
|
|
10
10
|
# iterations=mle.iterations
|
11
11
|
#
|
12
12
|
module MLE
|
13
|
-
|
13
|
+
class BaseMLE
|
14
14
|
attr_accessor :verbose
|
15
15
|
attr_accessor :output
|
16
16
|
# Could be :parameters or :mle
|
@@ -40,7 +40,7 @@ module Statsample
|
|
40
40
|
xi=Matrix.rows([x.row(i).to_a.collect{|v| v.to_f}])
|
41
41
|
y_val=y[i,0].to_f
|
42
42
|
fbx=f(b,x)
|
43
|
-
prod=prod*likehood_i(xi,y_val,b)
|
43
|
+
prod=prod*likehood_i(xi, y_val ,b)
|
44
44
|
}
|
45
45
|
prod
|
46
46
|
end
|
@@ -54,6 +54,8 @@ module Statsample
|
|
54
54
|
}
|
55
55
|
sum
|
56
56
|
end
|
57
|
+
|
58
|
+
|
57
59
|
# Creates a zero matrix Mx1, with M=x.M
|
58
60
|
def set_default_parameters(x)
|
59
61
|
fd=[0.0]*x.column_size
|
@@ -82,9 +84,9 @@ module Statsample
|
|
82
84
|
h=nil
|
83
85
|
fd=nil
|
84
86
|
if @stop_criteria==:mle
|
85
|
-
|
87
|
+
old_likehood=log_likehood(x, y, parameters)
|
86
88
|
else
|
87
|
-
|
89
|
+
old_parameters=parameters
|
88
90
|
end
|
89
91
|
ITERATIONS.times do |i|
|
90
92
|
@iterations=i+1
|
@@ -17,29 +17,29 @@ module Multiple
|
|
17
17
|
# lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,'y')
|
18
18
|
#
|
19
19
|
class AlglibEngine < BaseEngine
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
20
|
+
def initialize(ds,y_var, opts=Hash.new)
|
21
|
+
super
|
22
|
+
@ds=ds.dup_only_valid
|
23
|
+
@ds_valid=@ds
|
24
|
+
@dy=@ds[@y_var]
|
25
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
26
|
+
# Create a custom matrix
|
27
|
+
columns=[]
|
28
|
+
@fields=[]
|
29
|
+
@ds.fields.each{|f|
|
30
|
+
if f!=@y_var
|
31
|
+
columns.push(@ds[f].to_a)
|
32
|
+
@fields.push(f)
|
33
|
+
end
|
34
|
+
}
|
35
|
+
@dep_columns=columns.dup
|
36
|
+
columns.push(@ds[@y_var])
|
37
|
+
matrix=Matrix.columns(columns)
|
38
|
+
@lr_s=nil
|
39
|
+
@lr=::Alglib::LinearRegression.build_from_matrix(matrix)
|
40
|
+
@coeffs=assign_names(@lr.coeffs)
|
41
|
+
|
42
|
+
end
|
43
43
|
|
44
44
|
def _dump(i)
|
45
45
|
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
@@ -3,188 +3,187 @@ module Statsample
|
|
3
3
|
module Multiple
|
4
4
|
# Base class for Multiple Regression Engines
|
5
5
|
class BaseEngine
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
include GetText
|
7
|
+
bindtextdomain("statsample")
|
8
|
+
# Name of analysis
|
9
|
+
attr_accessor :name
|
10
|
+
def initialize(ds, y_var, opts = Hash.new)
|
11
|
+
@ds=ds
|
12
|
+
@y_var=y_var
|
13
|
+
@r2=nil
|
14
|
+
@name=_("Multiple Regression: %s over %s") % [ ds.fields.join(",") , @y_var]
|
15
|
+
opts.each{|k,v|
|
16
|
+
self.send("#{k}=",v) if self.respond_to? k
|
17
|
+
}
|
10
18
|
end
|
11
19
|
|
12
20
|
# Retrieves a vector with predicted values for y
|
13
21
|
def predicted
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
22
|
+
(0...@ds.cases).collect { |i|
|
23
|
+
invalid=false
|
24
|
+
vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
|
25
|
+
if invalid
|
26
|
+
nil
|
27
|
+
else
|
28
|
+
process(vect)
|
29
|
+
end
|
30
|
+
}.to_vector(:scale)
|
23
31
|
end
|
24
32
|
# Retrieves a vector with standarized values for y
|
25
33
|
def standarized_predicted
|
26
|
-
|
34
|
+
predicted.standarized
|
27
35
|
end
|
28
36
|
# Retrieves a vector with residuals values for y
|
29
37
|
def residuals
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
38
|
+
(0...@ds.cases).collect{|i|
|
39
|
+
invalid=false
|
40
|
+
vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
|
41
|
+
if invalid or @ds[@y_var][i].nil?
|
42
|
+
nil
|
43
|
+
else
|
44
|
+
@ds[@y_var][i] - process(vect)
|
45
|
+
end
|
46
|
+
}.to_vector(:scale)
|
39
47
|
end
|
40
48
|
# R Multiple
|
41
49
|
def r
|
42
|
-
|
50
|
+
raise "You should implement this"
|
43
51
|
end
|
44
52
|
# Sum of squares Total
|
45
53
|
def sst
|
46
|
-
|
54
|
+
raise "You should implement this"
|
47
55
|
end
|
48
56
|
# Sum of squares (regression)
|
49
57
|
def ssr
|
50
|
-
|
58
|
+
r2*sst
|
51
59
|
end
|
52
60
|
# Sum of squares (Error)
|
53
61
|
def sse
|
54
|
-
|
62
|
+
sst - ssr
|
55
63
|
end
|
56
64
|
# T values for coeffs
|
57
65
|
def coeffs_t
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
66
|
+
out={}
|
67
|
+
se=coeffs_se
|
68
|
+
coeffs.each do |k,v|
|
69
|
+
out[k]=v / se[k]
|
70
|
+
end
|
71
|
+
out
|
64
72
|
end
|
65
73
|
# Mean square Regression
|
66
74
|
def msr
|
67
|
-
|
75
|
+
ssr.quo(df_r)
|
68
76
|
end
|
69
77
|
# Mean Square Error
|
70
78
|
def mse
|
71
|
-
|
79
|
+
sse.quo(df_e)
|
72
80
|
end
|
73
81
|
# Degrees of freedom for regression
|
74
82
|
def df_r
|
75
|
-
|
83
|
+
@dep_columns.size
|
76
84
|
end
|
77
85
|
# Degrees of freedom for error
|
78
86
|
def df_e
|
79
|
-
|
87
|
+
@ds_valid.cases-@dep_columns.size-1
|
80
88
|
end
|
81
89
|
# Fisher for Anova
|
82
90
|
def f
|
83
|
-
|
91
|
+
(ssr.quo(df_r)).quo(sse.quo(df_e))
|
84
92
|
end
|
85
93
|
# Significance of Fisher
|
86
94
|
def significance
|
87
|
-
|
95
|
+
1.0-Distribution::F.cdf(f,df_r,df_e)
|
88
96
|
end
|
89
97
|
# Tolerance for a given variable
|
90
98
|
# http://talkstats.com/showthread.php?t=5056
|
91
99
|
def tolerance(var)
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
100
|
+
ds=assign_names(@dep_columns)
|
101
|
+
ds.each{|k,v|
|
102
|
+
ds[k]=v.to_vector(:scale)
|
103
|
+
}
|
104
|
+
lr=Multiple.listwise(ds.to_dataset,var)
|
105
|
+
1-lr.r2
|
98
106
|
end
|
99
107
|
# Tolerances for each coefficient
|
100
108
|
def coeffs_tolerances
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
109
|
+
@fields.inject({}) {|a,f|
|
110
|
+
a[f]=tolerance(f);
|
111
|
+
a
|
112
|
+
}
|
105
113
|
end
|
106
114
|
# Standard Error for coefficients
|
107
115
|
def coeffs_se
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
116
|
+
out={}
|
117
|
+
mse=sse.quo(df_e)
|
118
|
+
coeffs.each {|k,v|
|
119
|
+
out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
|
120
|
+
}
|
121
|
+
out
|
114
122
|
end
|
115
123
|
# Estimated Variance-Covariance Matrix
|
116
124
|
# Used for calculation of se of constant
|
117
125
|
def estimated_variance_covariance_matrix
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
Math::sqrt(i) if i>0
|
128
|
-
}
|
126
|
+
mse_p=mse
|
127
|
+
columns=[]
|
128
|
+
@ds_valid.each_vector{|k,v|
|
129
|
+
columns.push(v.data) unless k==@y_var
|
130
|
+
}
|
131
|
+
columns.unshift([1.0]*@ds_valid.cases)
|
132
|
+
x=Matrix.columns(columns)
|
133
|
+
matrix=((x.t*x)).inverse * mse
|
134
|
+
matrix.collect {|i| Math::sqrt(i) if i>0 }
|
129
135
|
end
|
130
136
|
# T for constant
|
131
137
|
def constant_t
|
132
|
-
|
138
|
+
constant.to_f/constant_se
|
133
139
|
end
|
134
140
|
# Standard error for constant
|
135
141
|
def constant_se
|
136
|
-
|
137
|
-
end
|
138
|
-
# Retrieves a summary for Regression
|
139
|
-
def summary(report_type=ConsoleSummary)
|
140
|
-
c=coeffs
|
141
|
-
out=""
|
142
|
-
out.extend report_type
|
143
|
-
out.add <<-HEREDOC
|
144
|
-
Summary for regression of #{@fields.join(',')} over #{@y_var}
|
145
|
-
*************************************************************
|
146
|
-
Engine: #{self.class}
|
147
|
-
Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
|
148
|
-
r=#{sprintf("%0.3f",r)}
|
149
|
-
r2=#{sprintf("%0.3f",r2)}
|
150
|
-
Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
|
151
|
-
HEREDOC
|
152
|
-
|
153
|
-
out.add_line
|
154
|
-
out.add "ANOVA TABLE"
|
155
|
-
|
156
|
-
t=Statsample::ReportTable.new(%w{source ss df ms f s})
|
157
|
-
t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
|
158
|
-
t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
|
159
|
-
|
160
|
-
t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
|
161
|
-
|
162
|
-
out.parse_table(t)
|
163
|
-
|
164
|
-
begin
|
165
|
-
out.add "Beta coefficientes"
|
166
|
-
sc=standarized_coeffs
|
167
|
-
cse=coeffs_se
|
168
|
-
t=Statsample::ReportTable.new(%w{coeff b beta se t})
|
169
|
-
t.add_row(["Constant", sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
|
170
|
-
@fields.each{|f|
|
171
|
-
t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
|
172
|
-
}
|
173
|
-
out.parse_table(t)
|
174
|
-
|
175
|
-
rescue
|
142
|
+
estimated_variance_covariance_matrix[0,0]
|
176
143
|
end
|
177
|
-
|
144
|
+
def summary
|
145
|
+
rp=ReportBuilder.new()
|
146
|
+
rp.add(self)
|
147
|
+
rp.to_text
|
178
148
|
end
|
149
|
+
def to_reportbuilder(generator)
|
150
|
+
anchor=generator.add_toc_entry(_("Multiple Regression: ")+@name)
|
151
|
+
generator.add_html "<div class='multiple-regression'>#{@name}<a name='#{anchor}'></a>"
|
152
|
+
c=coeffs
|
153
|
+
generator.add_text(_("Engine: %s") % self.class)
|
154
|
+
generator.add_text(_("Cases(listwise)=%d(%d)") % [@ds.cases, @ds_valid.cases])
|
155
|
+
generator.add_text("r=#{sprintf('%0.3f',r)}")
|
156
|
+
generator.add_text("r=#{sprintf('%0.3f',r2)}")
|
157
|
+
|
158
|
+
generator.add_text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') )
|
159
|
+
|
160
|
+
t=ReportBuilder::Table.new(:name=>"ANOVA", :header=>%w{source ss df ms f s})
|
161
|
+
t.add_row([_("Regression"), sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
|
162
|
+
t.add_row([_("Error"), sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
|
163
|
+
|
164
|
+
t.add_row([_("Total"), sprintf("%0.3f",sst), df_r+df_e])
|
165
|
+
generator.parse_element(t)
|
166
|
+
sc=standarized_coeffs
|
167
|
+
cse=coeffs_se
|
168
|
+
t=ReportBuilder::Table.new(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} )
|
169
|
+
|
170
|
+
t.add_row([_("Constant"), sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
|
171
|
+
@fields.each do |f|
|
172
|
+
t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
|
173
|
+
end
|
174
|
+
generator.parse_element(t)
|
175
|
+
generator.add_html("</div>")
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
179
|
def assign_names(c)
|
180
180
|
a={}
|
181
181
|
@fields.each_index {|i|
|
182
|
-
|
182
|
+
a[@fields[i]]=c[i]
|
183
183
|
}
|
184
184
|
a
|
185
185
|
end
|
186
186
|
|
187
|
-
|
188
187
|
# Deprecated
|
189
188
|
# Sum of squares of error (manual calculation)
|
190
189
|
# using the predicted value minus the y_i value
|
@@ -193,10 +192,10 @@ HEREDOC
|
|
193
192
|
cases=0
|
194
193
|
sse=(0...@ds.cases).inject(0) {|a,i|
|
195
194
|
if !@dy.data_with_nils[i].nil? and !pr[i].nil?
|
196
|
-
|
197
|
-
|
195
|
+
cases+=1
|
196
|
+
a+((pr[i]-@dy[i])**2)
|
198
197
|
else
|
199
|
-
|
198
|
+
a
|
200
199
|
end
|
201
200
|
}
|
202
201
|
sse*(min_n_valid-1.0).quo(cases-1)
|
@@ -17,105 +17,102 @@ module Multiple
|
|
17
17
|
# lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y')
|
18
18
|
#
|
19
19
|
class GslEngine < BaseEngine
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
end
|
34
|
-
j=0
|
35
|
-
@ds.fields.each{|f|
|
36
|
-
if f!=@y_var
|
37
|
-
@ds[f].each_index{|i1|
|
38
|
-
max_deps.set(i1,j,@ds[f][i1])
|
39
|
-
}
|
40
|
-
columns.push(@ds[f].to_a)
|
41
|
-
@fields.push(f)
|
42
|
-
j+=1
|
43
|
-
end
|
44
|
-
}
|
45
|
-
@dep_columns=columns.dup
|
46
|
-
@lr_s=nil
|
47
|
-
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
|
48
|
-
@constant=c[constant_col]
|
49
|
-
@coeffs_a=c.to_a.slice(0...constant_col)
|
50
|
-
@coeffs=assign_names(@coeffs_a)
|
51
|
-
c=nil
|
52
|
-
end
|
53
|
-
|
54
|
-
def _dump(i)
|
55
|
-
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
56
|
-
end
|
57
|
-
def self._load(data)
|
58
|
-
h=Marshal.load(data)
|
59
|
-
self.new(h['ds'], h['y_var'])
|
60
|
-
end
|
61
|
-
|
62
|
-
def coeffs
|
63
|
-
@coeffs
|
64
|
-
end
|
65
|
-
# Coefficients using a constant
|
66
|
-
# Based on http://www.xycoon.com/ols1.htm
|
67
|
-
def matrix_resolution
|
68
|
-
mse_p=mse
|
69
|
-
columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
|
70
|
-
columns.unshift([1.0]*@ds.cases)
|
71
|
-
y=Matrix.columns([@dy.data.map {|i| i.to_f}])
|
72
|
-
x=Matrix.columns(columns)
|
73
|
-
xt=x.t
|
74
|
-
matrix=((xt*x)).inverse*xt
|
75
|
-
matrix*y
|
76
|
-
end
|
77
|
-
def r2
|
78
|
-
r**2
|
20
|
+
def initialize(ds,y_var, opts=Hash.new)
|
21
|
+
super
|
22
|
+
@ds=ds.dup_only_valid
|
23
|
+
@ds_valid=@ds
|
24
|
+
@dy=@ds[@y_var]
|
25
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
26
|
+
# Create a custom matrix
|
27
|
+
columns=[]
|
28
|
+
@fields=[]
|
29
|
+
max_deps = GSL::Matrix.alloc(@ds.cases, @ds.fields.size)
|
30
|
+
constant_col=@ds.fields.size-1
|
31
|
+
for i in 0...@ds.cases
|
32
|
+
max_deps.set(i,constant_col,1)
|
79
33
|
end
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
def standarized_coeffs
|
90
|
-
l=lr_s
|
91
|
-
l.coeffs
|
92
|
-
end
|
93
|
-
def lr_s
|
94
|
-
if @lr_s.nil?
|
95
|
-
build_standarized
|
34
|
+
j=0
|
35
|
+
@ds.fields.each{|f|
|
36
|
+
if f!=@y_var
|
37
|
+
@ds[f].each_index{|i1|
|
38
|
+
max_deps.set(i1,j,@ds[f][i1])
|
39
|
+
}
|
40
|
+
columns.push(@ds[f].to_a)
|
41
|
+
@fields.push(f)
|
42
|
+
j+=1
|
96
43
|
end
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
44
|
+
}
|
45
|
+
@dep_columns=columns.dup
|
46
|
+
@lr_s=nil
|
47
|
+
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
|
48
|
+
@constant=c[constant_col]
|
49
|
+
@coeffs_a=c.to_a.slice(0...constant_col)
|
50
|
+
@coeffs=assign_names(@coeffs_a)
|
51
|
+
c=nil
|
52
|
+
end
|
53
|
+
|
54
|
+
def _dump(i)
|
55
|
+
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
56
|
+
end
|
57
|
+
def self._load(data)
|
58
|
+
h=Marshal.load(data)
|
59
|
+
self.new(h['ds'], h['y_var'])
|
60
|
+
end
|
61
|
+
|
62
|
+
def coeffs
|
63
|
+
@coeffs
|
64
|
+
end
|
65
|
+
# Coefficients using a constant
|
66
|
+
# Based on http://www.xycoon.com/ols1.htm
|
67
|
+
def matrix_resolution
|
68
|
+
mse_p=mse
|
69
|
+
columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
|
70
|
+
columns.unshift([1.0]*@ds.cases)
|
71
|
+
y=Matrix.columns([@dy.data.map {|i| i.to_f}])
|
72
|
+
x=Matrix.columns(columns)
|
73
|
+
xt=x.t
|
74
|
+
matrix=((xt*x)).inverse*xt
|
75
|
+
matrix*y
|
76
|
+
end
|
77
|
+
def r2
|
78
|
+
r**2
|
79
|
+
end
|
80
|
+
def r
|
81
|
+
Bivariate::pearson(@dy, predicted)
|
82
|
+
end
|
83
|
+
def sst
|
84
|
+
@dy.ss
|
85
|
+
end
|
86
|
+
def constant
|
87
|
+
@constant
|
88
|
+
end
|
89
|
+
def standarized_coeffs
|
90
|
+
l=lr_s
|
91
|
+
l.coeffs
|
92
|
+
end
|
93
|
+
def lr_s
|
94
|
+
if @lr_s.nil?
|
95
|
+
build_standarized
|
113
96
|
end
|
97
|
+
@lr_s
|
98
|
+
end
|
99
|
+
def build_standarized
|
100
|
+
@ds_s=@ds.standarize
|
101
|
+
@lr_s=GslEngine.new(@ds_s,@y_var)
|
102
|
+
end
|
103
|
+
def process_s(v)
|
104
|
+
lr_s.process(v)
|
105
|
+
end
|
106
|
+
# ???? Not equal to SPSS output
|
107
|
+
def standarized_residuals
|
108
|
+
res=residuals
|
109
|
+
red_sd=residuals.sds
|
110
|
+
res.collect {|v|
|
111
|
+
v.quo(red_sd)
|
112
|
+
}.to_vector(:scale)
|
113
|
+
end
|
114
114
|
end
|
115
115
|
end
|
116
116
|
end
|
117
117
|
end # for Statsample
|
118
118
|
end # for if
|
119
|
-
|
120
|
-
|
121
|
-
|