statsample 0.18.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +23 -0
- data/Manifest.txt +28 -17
- data/Rakefile +3 -2
- data/benchmarks/correlation_matrix_15_variables.rb +31 -0
- data/benchmarks/correlation_matrix_5_variables.rb +32 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/examples/boxplot.rb +13 -14
- data/examples/correlation_matrix.rb +16 -8
- data/examples/dataset.rb +13 -4
- data/examples/dominance_analysis.rb +23 -17
- data/examples/dominance_analysis_bootstrap.rb +28 -22
- data/examples/histogram.rb +8 -9
- data/examples/icc.rb +20 -21
- data/examples/levene.rb +10 -4
- data/examples/multiple_regression.rb +9 -28
- data/examples/multivariate_correlation.rb +9 -3
- data/examples/parallel_analysis.rb +20 -16
- data/examples/polychoric.rb +15 -9
- data/examples/principal_axis.rb +18 -6
- data/examples/reliability.rb +26 -13
- data/examples/scatterplot.rb +10 -6
- data/examples/t_test.rb +15 -6
- data/examples/tetrachoric.rb +9 -2
- data/examples/u_test.rb +12 -4
- data/examples/vector.rb +13 -2
- data/examples/velicer_map_test.rb +33 -26
- data/lib/statsample.rb +32 -12
- data/lib/statsample/analysis.rb +79 -0
- data/lib/statsample/analysis/suite.rb +72 -0
- data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
- data/lib/statsample/bivariate.rb +70 -16
- data/lib/statsample/dataset.rb +25 -19
- data/lib/statsample/dominanceanalysis.rb +2 -2
- data/lib/statsample/factor.rb +2 -0
- data/lib/statsample/factor/map.rb +16 -10
- data/lib/statsample/factor/parallelanalysis.rb +9 -3
- data/lib/statsample/factor/pca.rb +28 -32
- data/lib/statsample/factor/rotation.rb +15 -8
- data/lib/statsample/graph/boxplot.rb +3 -4
- data/lib/statsample/graph/histogram.rb +2 -1
- data/lib/statsample/graph/scatterplot.rb +1 -0
- data/lib/statsample/matrix.rb +106 -16
- data/lib/statsample/regression.rb +4 -1
- data/lib/statsample/regression/binomial.rb +1 -1
- data/lib/statsample/regression/multiple/baseengine.rb +19 -9
- data/lib/statsample/regression/multiple/gslengine.rb +127 -126
- data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/regression/simple.rb +31 -6
- data/lib/statsample/reliability.rb +11 -3
- data/lib/statsample/reliability/scaleanalysis.rb +4 -4
- data/lib/statsample/shorthand.rb +81 -0
- data/lib/statsample/test/chisquare.rb +1 -1
- data/lib/statsample/vector.rb +163 -163
- data/lib/statsample/vector/gsl.rb +106 -0
- data/references.txt +2 -2
- data/{data → test/fixtures}/crime.txt +0 -0
- data/{data → test/fixtures}/hartman_23.matrix +0 -0
- data/{data → test/fixtures}/repeated_fields.csv +0 -0
- data/{data → test/fixtures}/test_binomial.csv +0 -0
- data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
- data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
- data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
- data/{data → test/fixtures}/tetmat_test.txt +0 -0
- data/test/helpers_tests.rb +18 -2
- data/test/test_analysis.rb +118 -0
- data/test/test_anovatwoway.rb +1 -1
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +1 -2
- data/test/test_bartlettsphericity.rb +1 -2
- data/test/test_bivariate.rb +64 -22
- data/test/test_codification.rb +1 -2
- data/test/test_crosstab.rb +1 -2
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +24 -3
- data/test/test_dominance_analysis.rb +1 -2
- data/test/test_factor.rb +8 -69
- data/test/test_factor_map.rb +43 -0
- data/test/test_factor_pa.rb +54 -0
- data/test/test_ggobi.rb +1 -1
- data/test/test_gsl.rb +12 -18
- data/test/test_histogram.rb +1 -2
- data/test/test_logit.rb +62 -18
- data/test/test_matrix.rb +4 -5
- data/test/test_mle.rb +3 -4
- data/test/test_regression.rb +21 -2
- data/test/test_reliability.rb +3 -3
- data/test/test_reliability_icc.rb +1 -1
- data/test/test_reliability_skillscale.rb +20 -4
- data/test/test_resample.rb +1 -2
- data/test/test_rserve_extension.rb +1 -2
- data/test/test_srs.rb +1 -2
- data/test/test_statistics.rb +1 -2
- data/test/test_stest.rb +1 -2
- data/test/test_stratified.rb +1 -2
- data/test/test_test_f.rb +1 -2
- data/test/test_test_t.rb +1 -2
- data/test/test_umannwhitney.rb +1 -2
- data/test/test_vector.rb +117 -18
- data/test/test_xls.rb +2 -3
- data/web/Rakefile +39 -0
- metadata +109 -29
- metadata.gz.sig +0 -0
- data/examples/parallel_analysis_tetrachoric.rb +0 -31
- data/lib/distribution.rb +0 -25
- data/lib/distribution/chisquare.rb +0 -23
- data/lib/distribution/f.rb +0 -35
- data/lib/distribution/normal.rb +0 -60
- data/lib/distribution/normalbivariate.rb +0 -284
- data/lib/distribution/normalmultivariate.rb +0 -73
- data/lib/distribution/t.rb +0 -55
- data/test/test_distribution.rb +0 -73
@@ -27,7 +27,7 @@ module Factor
|
|
27
27
|
attr_accessor :max_iterations
|
28
28
|
# Maximum precision
|
29
29
|
attr_accessor :epsilon
|
30
|
-
|
30
|
+
attr_accessor :use_gsl
|
31
31
|
dirty_writer :max_iterations, :epsilon
|
32
32
|
dirty_memoize :iterations, :rotated, :component_transformation_matrix, :h2
|
33
33
|
|
@@ -41,6 +41,7 @@ module Factor
|
|
41
41
|
@epsilon=EPSILON
|
42
42
|
@rotated=nil
|
43
43
|
@h2=(@matrix.collect {|c| c**2} * Matrix.column_vector([1]*@m)).column(0).to_a
|
44
|
+
@use_gsl=Statsample.has_gsl?
|
44
45
|
opts.each{|k,v|
|
45
46
|
self.send("#{k}=",v) if self.respond_to? k
|
46
47
|
}
|
@@ -58,11 +59,12 @@ module Factor
|
|
58
59
|
end
|
59
60
|
# Start iteration
|
60
61
|
def iterate
|
61
|
-
|
62
|
-
|
63
|
-
|
62
|
+
k_matrix=@use_gsl ? GSL::Matrix : ::Matrix
|
63
|
+
t=k_matrix.identity(@m)
|
64
|
+
b=(@use_gsl ? @matrix.to_gsl : @matrix.dup)
|
65
|
+
h=k_matrix.diagonal(*@h2).collect {|c| Math::sqrt(c)}
|
64
66
|
h_inverse=h.collect {|c| c!=0 ? 1/c : 0 }
|
65
|
-
bh=h_inverse*b
|
67
|
+
bh=h_inverse * b
|
66
68
|
@not_converged=true
|
67
69
|
@iterations=0
|
68
70
|
while @not_converged
|
@@ -110,9 +112,14 @@ module Factor
|
|
110
112
|
t[row_i][i]=tx_rot[row_i]
|
111
113
|
t[row_i][j]=ty_rot[row_i]
|
112
114
|
}
|
113
|
-
|
114
|
-
|
115
|
-
|
115
|
+
#if @use_gsl
|
116
|
+
bh=k_matrix.[](*bh)
|
117
|
+
t=k_matrix.[](*t)
|
118
|
+
#else
|
119
|
+
# bh=Matrix.rows(bh)
|
120
|
+
# t=Matrix.rows(t)
|
121
|
+
|
122
|
+
#end
|
116
123
|
else
|
117
124
|
num_pairs=num_pairs-1
|
118
125
|
@not_converged=false if num_pairs==0
|
@@ -49,7 +49,6 @@ module Statsample
|
|
49
49
|
# to the anchor location. For example, with the default left alignment,
|
50
50
|
# an angle of Math.PI / 2 causes text to proceed downwards. The default angle is zero.
|
51
51
|
attr_accessor :label_angle
|
52
|
-
|
53
52
|
attr_reader :x_scale, :y_scale
|
54
53
|
# Create a new Boxplot.
|
55
54
|
# Parameters: Hash of options
|
@@ -223,11 +222,11 @@ module Statsample
|
|
223
222
|
dot.bottom {|v| y_scale.scale(v)}
|
224
223
|
dot.title {|v| v}
|
225
224
|
end
|
226
|
-
|
227
|
-
|
228
|
-
end
|
225
|
+
end
|
229
226
|
end
|
227
|
+
vis
|
230
228
|
end
|
229
|
+
|
231
230
|
# Returns SVG with scatterplot
|
232
231
|
def to_svg
|
233
232
|
rp=rubyvis_panel
|
@@ -120,7 +120,7 @@ module Statsample
|
|
120
120
|
y_scale=Rubyvis::Scale.linear(@minimum_y, @maximum_y).range(0, height - margin_vert)
|
121
121
|
|
122
122
|
y_scale.nice
|
123
|
-
|
123
|
+
|
124
124
|
bins=@hist.bins.times.map {|i|
|
125
125
|
{
|
126
126
|
:low =>@hist.get_range(i)[0],
|
@@ -170,6 +170,7 @@ module Statsample
|
|
170
170
|
end
|
171
171
|
rubyvis_normal_distribution(pan) if @line_normal_distribution
|
172
172
|
end
|
173
|
+
vis
|
173
174
|
end
|
174
175
|
# Returns SVG with scatterplot
|
175
176
|
def to_svg
|
data/lib/statsample/matrix.rb
CHANGED
@@ -2,6 +2,9 @@ class ::Vector
|
|
2
2
|
def to_matrix
|
3
3
|
::Matrix.columns([self.to_a])
|
4
4
|
end
|
5
|
+
def to_vector
|
6
|
+
self
|
7
|
+
end
|
5
8
|
end
|
6
9
|
class ::Matrix
|
7
10
|
def to_matrix
|
@@ -28,18 +31,24 @@ class ::Matrix
|
|
28
31
|
if Statsample.has_gsl?
|
29
32
|
# Optimize eigenpairs of extendmatrix module using gsl
|
30
33
|
def eigenpairs
|
31
|
-
|
32
|
-
ep=eigval.size.times.map {|i|
|
33
|
-
[eigval[i], eigvec.get_col(i).to_a]
|
34
|
-
}
|
35
|
-
ep.sort{|a,b| a[0]<=>b[0]}.reverse
|
34
|
+
to_gsl.eigenpairs
|
36
35
|
end
|
37
36
|
end
|
38
37
|
|
39
38
|
def eigenvalues
|
40
|
-
|
39
|
+
eigenpairs.collect {|v| v[0]}
|
40
|
+
end
|
41
|
+
def eigenvectors
|
42
|
+
eigenpairs.collect {|v| v[1]}
|
43
|
+
end
|
44
|
+
def eigenvectors_matrix
|
45
|
+
Matrix.columns(eigenvectors)
|
41
46
|
end
|
47
|
+
|
48
|
+
|
42
49
|
|
50
|
+
|
51
|
+
|
43
52
|
def to_gsl
|
44
53
|
out=[]
|
45
54
|
self.row_size.times{|i|
|
@@ -55,18 +64,94 @@ module GSL
|
|
55
64
|
def to_matrix
|
56
65
|
::Matrix.columns([self.size.times.map {|i| self[i]}])
|
57
66
|
end
|
67
|
+
def to_ary
|
68
|
+
to_a
|
69
|
+
end
|
70
|
+
def to_gsl
|
71
|
+
self
|
72
|
+
end
|
58
73
|
end
|
59
74
|
end
|
60
75
|
class Matrix
|
61
76
|
def to_gsl
|
62
77
|
self
|
63
78
|
end
|
79
|
+
|
80
|
+
def to_dataset
|
81
|
+
f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i| _("VAR_%d") % (i+1) }
|
82
|
+
ds=Statsample::Dataset.new(f)
|
83
|
+
f.each do |ff|
|
84
|
+
ds[ff].type=:scale
|
85
|
+
ds[ff].name=ff
|
86
|
+
end
|
87
|
+
row_size.times {|i|
|
88
|
+
ds.add_case_array(self.row(i).to_a)
|
89
|
+
}
|
90
|
+
ds.update_valid_data
|
91
|
+
ds.name=self.name if self.respond_to? :name
|
92
|
+
ds
|
93
|
+
end
|
94
|
+
|
95
|
+
def row_size
|
96
|
+
size1
|
97
|
+
end
|
98
|
+
def column_size
|
99
|
+
size2
|
100
|
+
end
|
101
|
+
def determinant
|
102
|
+
det
|
103
|
+
end
|
104
|
+
def inverse
|
105
|
+
GSL::Linalg::LU.invert(self)
|
106
|
+
end
|
107
|
+
def eigenvalues
|
108
|
+
eigenpairs.collect {|v| v[0]}
|
109
|
+
end
|
110
|
+
def eigenvectors
|
111
|
+
eigenpairs.collect {|v| v[1]}
|
112
|
+
end
|
113
|
+
|
114
|
+
# Matrix sum of squares
|
115
|
+
def mssq
|
116
|
+
sum=0
|
117
|
+
to_v.each {|i| sum+=i**2}
|
118
|
+
sum
|
119
|
+
end
|
120
|
+
|
121
|
+
def eigenvectors_matrix
|
122
|
+
eigval, eigvec= GSL::Eigen.symmv(self)
|
123
|
+
GSL::Eigen::symmv_sort(eigval, eigvec, GSL::Eigen::SORT_VAL_DESC)
|
124
|
+
eigvec
|
125
|
+
end
|
126
|
+
def eigenpairs
|
127
|
+
eigval, eigvec= GSL::Eigen.symmv(self)
|
128
|
+
GSL::Eigen::symmv_sort(eigval, eigvec, GSL::Eigen::SORT_VAL_DESC)
|
129
|
+
@eigenpairs=eigval.size.times.map {|i|
|
130
|
+
[eigval[i],eigvec.get_col(i)]
|
131
|
+
}
|
132
|
+
end
|
133
|
+
|
134
|
+
#def eigenpairs_ruby
|
135
|
+
# self.to_matrix.eigenpairs_ruby
|
136
|
+
#end
|
137
|
+
def square?
|
138
|
+
size1==size2
|
139
|
+
end
|
64
140
|
def to_matrix
|
65
141
|
rows=self.size1
|
66
142
|
cols=self.size2
|
67
143
|
out=(0...rows).collect{|i| (0...cols).collect {|j| self[i,j]} }
|
68
144
|
::Matrix.rows(out)
|
69
145
|
end
|
146
|
+
def total_sum
|
147
|
+
sum=0
|
148
|
+
size1.times {|i|
|
149
|
+
size2.times {|j|
|
150
|
+
sum+=self[i,j]
|
151
|
+
}
|
152
|
+
}
|
153
|
+
sum
|
154
|
+
end
|
70
155
|
end
|
71
156
|
end
|
72
157
|
|
@@ -122,7 +207,7 @@ module Statsample
|
|
122
207
|
@@covariatematrix=0
|
123
208
|
|
124
209
|
# Get type of covariate matrix. Could be :covariance or :correlation
|
125
|
-
def
|
210
|
+
def _type
|
126
211
|
if row_size==column_size
|
127
212
|
if row_size.times.find {|i| self[i,i]!=1.0}
|
128
213
|
:covariance
|
@@ -134,11 +219,11 @@ module Statsample
|
|
134
219
|
end
|
135
220
|
|
136
221
|
end
|
137
|
-
def
|
222
|
+
def _type=(t)
|
138
223
|
@type=t
|
139
224
|
end
|
140
225
|
def correlation
|
141
|
-
if(
|
226
|
+
if(_type==:covariance)
|
142
227
|
matrix=Matrix.rows(row_size.times.collect { |i|
|
143
228
|
column_size.times.collect { |j|
|
144
229
|
if i==j
|
@@ -151,7 +236,7 @@ module Statsample
|
|
151
236
|
matrix.extend CovariateMatrix
|
152
237
|
matrix.fields_x=fields_x
|
153
238
|
matrix.fields_y=fields_y
|
154
|
-
matrix.
|
239
|
+
matrix._type=:correlation
|
155
240
|
matrix
|
156
241
|
else
|
157
242
|
self
|
@@ -192,12 +277,17 @@ module Statsample
|
|
192
277
|
columns||=rows
|
193
278
|
# Convert all labels on index
|
194
279
|
row_index=rows.collect {|v|
|
195
|
-
v.is_a?(Numeric) ? v : fields_x.index(v)
|
280
|
+
r=v.is_a?(Numeric) ? v : fields_x.index(v)
|
281
|
+
raise "Index #{v} doesn't exists on matrix" if r.nil?
|
282
|
+
r
|
196
283
|
}
|
197
284
|
column_index=columns.collect {|v|
|
198
|
-
v.is_a?(Numeric) ? v : fields_y.index(v)
|
285
|
+
r=v.is_a?(Numeric) ? v : fields_y.index(v)
|
286
|
+
raise "Index #{v} doesn't exists on matrix" if r.nil?
|
287
|
+
r
|
199
288
|
}
|
200
|
-
|
289
|
+
|
290
|
+
|
201
291
|
fx=row_index.collect {|v| fields_x[v]}
|
202
292
|
fy=column_index.collect {|v| fields_y[v]}
|
203
293
|
|
@@ -206,14 +296,14 @@ module Statsample
|
|
206
296
|
matrix.extend CovariateMatrix
|
207
297
|
matrix.fields_x=fx
|
208
298
|
matrix.fields_y=fy
|
209
|
-
matrix.
|
299
|
+
matrix._type=_type
|
210
300
|
matrix
|
211
301
|
end
|
212
302
|
def report_building(generator)
|
213
|
-
@name||= (
|
303
|
+
@name||= (_type==:correlation ? _("Correlation"):_("Covariance"))+_(" Matrix")
|
214
304
|
generator.table(:name=>@name, :header=>[""]+fields_y) do |t|
|
215
305
|
row_size.times {|i|
|
216
|
-
t.row([fields_x[i]]
|
306
|
+
t.row([fields_x[i]]+row(i).to_a.collect {|i1|
|
217
307
|
i1.nil? ? "--" : sprintf("%0.3f",i1).gsub("0.",".")
|
218
308
|
})
|
219
309
|
}
|
@@ -22,6 +22,9 @@ module Statsample
|
|
22
22
|
# * Logit Regression: Statsample::Regression::Binomial::Logit
|
23
23
|
# * Probit Regression: Statsample::Regression::Binomial::Probit
|
24
24
|
module Regression
|
25
|
+
|
26
|
+
LinearDependency=Class.new(Exception)
|
27
|
+
|
25
28
|
# Create a Statsample::Regression::Simple object, for simple regression
|
26
29
|
# * x: independent Vector
|
27
30
|
# * y: dependent Vector
|
@@ -78,7 +81,7 @@ module Statsample
|
|
78
81
|
if missing_data==:pairwise
|
79
82
|
Statsample::Regression::Multiple::RubyEngine.new(ds,y_var, opts)
|
80
83
|
else
|
81
|
-
if Statsample.has_gsl?
|
84
|
+
if Statsample.has_gsl? and false
|
82
85
|
Statsample::Regression::Multiple::GslEngine.new(ds, y_var, opts)
|
83
86
|
else
|
84
87
|
ds2=ds.dup_only_valid
|
@@ -12,6 +12,8 @@ module Statsample
|
|
12
12
|
attr_reader :valid_cases
|
13
13
|
# Number of total cases (dataset.cases)
|
14
14
|
attr_reader :total_cases
|
15
|
+
|
16
|
+
attr_accessor :digits
|
15
17
|
def self.univariate?
|
16
18
|
true
|
17
19
|
end
|
@@ -23,9 +25,15 @@ module Statsample
|
|
23
25
|
@y_var=y_var
|
24
26
|
@r2=nil
|
25
27
|
@name=_("Multiple Regression: %s over %s") % [ ds.fields.join(",") , @y_var]
|
26
|
-
|
28
|
+
|
29
|
+
|
30
|
+
opts_default={:digits=>3}
|
31
|
+
@opts=opts_default.merge opts
|
32
|
+
|
33
|
+
@opts.each{|k,v|
|
27
34
|
self.send("#{k}=",v) if self.respond_to? k
|
28
35
|
}
|
36
|
+
|
29
37
|
end
|
30
38
|
# Calculate F Test
|
31
39
|
def anova
|
@@ -159,7 +167,7 @@ module Statsample
|
|
159
167
|
columns.unshift([1.0]*@valid_cases)
|
160
168
|
x=Matrix.columns(columns)
|
161
169
|
matrix=((x.t*x)).inverse * mse
|
162
|
-
matrix.collect {|i| Math::sqrt(i) if i
|
170
|
+
matrix.collect {|i| Math::sqrt(i) if i>=0 }
|
163
171
|
end
|
164
172
|
# T for constant
|
165
173
|
def constant_t
|
@@ -170,24 +178,26 @@ module Statsample
|
|
170
178
|
estimated_variance_covariance_matrix[0,0]
|
171
179
|
end
|
172
180
|
def report_building(b)
|
181
|
+
di="%0.#{digits}f"
|
173
182
|
b.section(:name=>@name) do |g|
|
174
183
|
c=coeffs
|
175
184
|
g.text _("Engine: %s") % self.class
|
176
185
|
g.text(_("Cases(listwise)=%d(%d)") % [@total_cases, @valid_cases])
|
177
|
-
g.text _("R
|
178
|
-
g.text _("R^2
|
179
|
-
g.text _"R^2 Adj
|
180
|
-
g.text _("Std.Error R
|
186
|
+
g.text _("R=#{di}") % r
|
187
|
+
g.text _("R^2=#{di}") % r2
|
188
|
+
g.text _"R^2 Adj=#{di}" % r2_adjusted
|
189
|
+
g.text _("Std.Error R=#{di}") % se_estimate
|
181
190
|
|
182
|
-
g.text(_("Equation")+"="+ sprintf(
|
191
|
+
g.text(_("Equation")+"="+ sprintf(di,constant) +" + "+ @fields.collect {|k| sprintf("#{di}%s",c[k],k)}.join(' + ') )
|
183
192
|
|
184
193
|
g.parse_element(anova)
|
185
194
|
sc=standarized_coeffs
|
195
|
+
|
186
196
|
cse=coeffs_se
|
187
197
|
g.table(:name=>_("Beta coefficients"), :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
|
188
|
-
t.row([_("Constant"), sprintf(
|
198
|
+
t.row([_("Constant"), sprintf(di, constant), "-", constant_se.nil? ? "": sprintf(di, constant_se), constant_t.nil? ? "" : sprintf(di, constant_t)])
|
189
199
|
@fields.each do |f|
|
190
|
-
t.row([f, sprintf(
|
200
|
+
t.row([f, sprintf(di, c[f]), sprintf(di, sc[f]), sprintf(di, cse[f]), sprintf(di, c[f].quo(cse[f]))])
|
191
201
|
end
|
192
202
|
end
|
193
203
|
end
|
@@ -1,131 +1,132 @@
|
|
1
1
|
if Statsample.has_gsl?
|
2
|
-
module Statsample
|
3
|
-
module Regression
|
4
|
-
module Multiple
|
5
|
-
# Class for Multiple Regression Analysis
|
6
|
-
# Requires rbgsl and uses a listwise aproach.
|
7
|
-
# Slower on prediction of values than Alglib, because predict is ruby based.
|
8
|
-
# Better memory management on multiple (+1000) series of regression.
|
9
|
-
# If you need pairwise, use RubyEngine
|
10
|
-
# Example:
|
11
|
-
#
|
12
|
-
# @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
13
|
-
# @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
14
|
-
# @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
15
|
-
# @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
16
|
-
# ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
17
|
-
# lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y')
|
18
|
-
#
|
19
|
-
class GslEngine < BaseEngine
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
2
|
+
module Statsample
|
3
|
+
module Regression
|
4
|
+
module Multiple
|
5
|
+
# Class for Multiple Regression Analysis
|
6
|
+
# Requires rbgsl and uses a listwise aproach.
|
7
|
+
# Slower on prediction of values than Alglib, because predict is ruby based.
|
8
|
+
# Better memory management on multiple (+1000) series of regression.
|
9
|
+
# If you need pairwise, use RubyEngine
|
10
|
+
# Example:
|
11
|
+
#
|
12
|
+
# @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
13
|
+
# @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
14
|
+
# @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
15
|
+
# @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
16
|
+
# ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
17
|
+
# lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y')
|
18
|
+
#
|
19
|
+
class GslEngine < BaseEngine
|
20
|
+
def initialize(ds,y_var, opts=Hash.new)
|
21
|
+
super
|
22
|
+
@ds=ds.dup_only_valid
|
23
|
+
@ds_valid=@ds
|
24
|
+
@valid_cases=@ds_valid.cases
|
25
|
+
@dy=@ds[@y_var]
|
26
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
27
|
+
# Create a custom matrix
|
28
|
+
columns=[]
|
29
|
+
@fields=[]
|
30
|
+
max_deps = GSL::Matrix.alloc(@ds.cases, @ds.fields.size)
|
31
|
+
constant_col=@ds.fields.size-1
|
32
|
+
for i in 0...@ds.cases
|
33
|
+
max_deps.set(i,constant_col,1)
|
34
|
+
end
|
35
|
+
j=0
|
36
|
+
@ds.fields.each{|f|
|
37
|
+
if f!=@y_var
|
38
|
+
@ds[f].each_index{|i1|
|
39
|
+
max_deps.set(i1,j,@ds[f][i1])
|
40
|
+
}
|
41
|
+
columns.push(@ds[f].to_a)
|
42
|
+
@fields.push(f)
|
43
|
+
j+=1
|
44
|
+
end
|
40
45
|
}
|
41
|
-
columns.
|
42
|
-
@
|
43
|
-
|
46
|
+
@dep_columns=columns.dup
|
47
|
+
@lr_s=nil
|
48
|
+
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
|
49
|
+
@constant=c[constant_col]
|
50
|
+
@coeffs_a=c.to_a.slice(0...constant_col)
|
51
|
+
@coeffs=assign_names(@coeffs_a)
|
52
|
+
c=nil
|
53
|
+
end
|
54
|
+
|
55
|
+
def _dump(i)
|
56
|
+
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
57
|
+
end
|
58
|
+
def self._load(data)
|
59
|
+
h=Marshal.load(data)
|
60
|
+
self.new(h['ds'], h['y_var'])
|
61
|
+
end
|
62
|
+
|
63
|
+
def coeffs
|
64
|
+
@coeffs
|
65
|
+
end
|
66
|
+
# Coefficients using a constant
|
67
|
+
# Based on http://www.xycoon.com/ols1.htm
|
68
|
+
def matrix_resolution
|
69
|
+
columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
|
70
|
+
columns.unshift([1.0]*@ds.cases)
|
71
|
+
y=Matrix.columns([@dy.data.map {|i| i.to_f}])
|
72
|
+
x=Matrix.columns(columns)
|
73
|
+
xt=x.t
|
74
|
+
matrix=((xt*x)).inverse*xt
|
75
|
+
matrix*y
|
76
|
+
end
|
77
|
+
def r2
|
78
|
+
r**2
|
79
|
+
end
|
80
|
+
def r
|
81
|
+
Bivariate::pearson(@dy, predicted)
|
82
|
+
end
|
83
|
+
def sst
|
84
|
+
@dy.ss
|
85
|
+
end
|
86
|
+
def constant
|
87
|
+
@constant
|
88
|
+
end
|
89
|
+
def standarized_coeffs
|
90
|
+
l=lr_s
|
91
|
+
l.coeffs
|
92
|
+
end
|
93
|
+
def lr_s
|
94
|
+
if @lr_s.nil?
|
95
|
+
build_standarized
|
96
|
+
end
|
97
|
+
@lr_s
|
98
|
+
end
|
99
|
+
def build_standarized
|
100
|
+
@ds_s=@ds.standarize
|
101
|
+
@lr_s=GslEngine.new(@ds_s,@y_var)
|
102
|
+
end
|
103
|
+
def process_s(v)
|
104
|
+
lr_s.process(v)
|
105
|
+
end
|
106
|
+
# ???? Not equal to SPSS output
|
107
|
+
def standarized_residuals
|
108
|
+
res=residuals
|
109
|
+
red_sd=residuals.sds
|
110
|
+
res.collect {|v|
|
111
|
+
v.quo(red_sd)
|
112
|
+
}.to_vector(:scale)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Standard error for coeffs
|
116
|
+
def coeffs_se
|
117
|
+
out={}
|
118
|
+
evcm=estimated_variance_covariance_matrix
|
119
|
+
@ds_valid.fields.each_with_index do |f,i|
|
120
|
+
|
121
|
+
mi=i+1
|
122
|
+
next if f==@y_var
|
123
|
+
out[f]=evcm[mi,mi]
|
124
|
+
end
|
125
|
+
out
|
126
|
+
end
|
127
|
+
|
44
128
|
end
|
45
|
-
|
46
|
-
@dep_columns=columns.dup
|
47
|
-
@lr_s=nil
|
48
|
-
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
|
49
|
-
@constant=c[constant_col]
|
50
|
-
@coeffs_a=c.to_a.slice(0...constant_col)
|
51
|
-
@coeffs=assign_names(@coeffs_a)
|
52
|
-
c=nil
|
53
|
-
end
|
54
|
-
|
55
|
-
def _dump(i)
|
56
|
-
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
57
|
-
end
|
58
|
-
def self._load(data)
|
59
|
-
h=Marshal.load(data)
|
60
|
-
self.new(h['ds'], h['y_var'])
|
61
|
-
end
|
62
|
-
|
63
|
-
def coeffs
|
64
|
-
@coeffs
|
65
|
-
end
|
66
|
-
# Coefficients using a constant
|
67
|
-
# Based on http://www.xycoon.com/ols1.htm
|
68
|
-
def matrix_resolution
|
69
|
-
columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
|
70
|
-
columns.unshift([1.0]*@ds.cases)
|
71
|
-
y=Matrix.columns([@dy.data.map {|i| i.to_f}])
|
72
|
-
x=Matrix.columns(columns)
|
73
|
-
xt=x.t
|
74
|
-
matrix=((xt*x)).inverse*xt
|
75
|
-
matrix*y
|
76
|
-
end
|
77
|
-
def r2
|
78
|
-
r**2
|
79
|
-
end
|
80
|
-
def r
|
81
|
-
Bivariate::pearson(@dy, predicted)
|
82
|
-
end
|
83
|
-
def sst
|
84
|
-
@dy.ss
|
85
|
-
end
|
86
|
-
def constant
|
87
|
-
@constant
|
88
|
-
end
|
89
|
-
def standarized_coeffs
|
90
|
-
l=lr_s
|
91
|
-
l.coeffs
|
92
|
-
end
|
93
|
-
def lr_s
|
94
|
-
if @lr_s.nil?
|
95
|
-
build_standarized
|
96
|
-
end
|
97
|
-
@lr_s
|
98
|
-
end
|
99
|
-
def build_standarized
|
100
|
-
@ds_s=@ds.standarize
|
101
|
-
@lr_s=GslEngine.new(@ds_s,@y_var)
|
102
|
-
end
|
103
|
-
def process_s(v)
|
104
|
-
lr_s.process(v)
|
105
|
-
end
|
106
|
-
# ???? Not equal to SPSS output
|
107
|
-
def standarized_residuals
|
108
|
-
res=residuals
|
109
|
-
red_sd=residuals.sds
|
110
|
-
res.collect {|v|
|
111
|
-
v.quo(red_sd)
|
112
|
-
}.to_vector(:scale)
|
113
|
-
end
|
114
|
-
|
115
|
-
# Standard error for coeffs
|
116
|
-
def coeffs_se
|
117
|
-
out={}
|
118
|
-
evcm=estimated_variance_covariance_matrix
|
119
|
-
@ds_valid.fields.each_with_index do |f,i|
|
120
|
-
mi=i+1
|
121
|
-
next if f==@y_var
|
122
|
-
out[f]=evcm[mi,mi]
|
129
|
+
end
|
123
130
|
end
|
124
|
-
|
125
|
-
end
|
126
|
-
|
127
|
-
end
|
128
|
-
end
|
129
|
-
end
|
130
|
-
end # for Statsample
|
131
|
+
end # for Statsample
|
131
132
|
end # for if
|