statsample 0.18.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +23 -0
- data/Manifest.txt +28 -17
- data/Rakefile +3 -2
- data/benchmarks/correlation_matrix_15_variables.rb +31 -0
- data/benchmarks/correlation_matrix_5_variables.rb +32 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/examples/boxplot.rb +13 -14
- data/examples/correlation_matrix.rb +16 -8
- data/examples/dataset.rb +13 -4
- data/examples/dominance_analysis.rb +23 -17
- data/examples/dominance_analysis_bootstrap.rb +28 -22
- data/examples/histogram.rb +8 -9
- data/examples/icc.rb +20 -21
- data/examples/levene.rb +10 -4
- data/examples/multiple_regression.rb +9 -28
- data/examples/multivariate_correlation.rb +9 -3
- data/examples/parallel_analysis.rb +20 -16
- data/examples/polychoric.rb +15 -9
- data/examples/principal_axis.rb +18 -6
- data/examples/reliability.rb +26 -13
- data/examples/scatterplot.rb +10 -6
- data/examples/t_test.rb +15 -6
- data/examples/tetrachoric.rb +9 -2
- data/examples/u_test.rb +12 -4
- data/examples/vector.rb +13 -2
- data/examples/velicer_map_test.rb +33 -26
- data/lib/statsample.rb +32 -12
- data/lib/statsample/analysis.rb +79 -0
- data/lib/statsample/analysis/suite.rb +72 -0
- data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
- data/lib/statsample/bivariate.rb +70 -16
- data/lib/statsample/dataset.rb +25 -19
- data/lib/statsample/dominanceanalysis.rb +2 -2
- data/lib/statsample/factor.rb +2 -0
- data/lib/statsample/factor/map.rb +16 -10
- data/lib/statsample/factor/parallelanalysis.rb +9 -3
- data/lib/statsample/factor/pca.rb +28 -32
- data/lib/statsample/factor/rotation.rb +15 -8
- data/lib/statsample/graph/boxplot.rb +3 -4
- data/lib/statsample/graph/histogram.rb +2 -1
- data/lib/statsample/graph/scatterplot.rb +1 -0
- data/lib/statsample/matrix.rb +106 -16
- data/lib/statsample/regression.rb +4 -1
- data/lib/statsample/regression/binomial.rb +1 -1
- data/lib/statsample/regression/multiple/baseengine.rb +19 -9
- data/lib/statsample/regression/multiple/gslengine.rb +127 -126
- data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/regression/simple.rb +31 -6
- data/lib/statsample/reliability.rb +11 -3
- data/lib/statsample/reliability/scaleanalysis.rb +4 -4
- data/lib/statsample/shorthand.rb +81 -0
- data/lib/statsample/test/chisquare.rb +1 -1
- data/lib/statsample/vector.rb +163 -163
- data/lib/statsample/vector/gsl.rb +106 -0
- data/references.txt +2 -2
- data/{data → test/fixtures}/crime.txt +0 -0
- data/{data → test/fixtures}/hartman_23.matrix +0 -0
- data/{data → test/fixtures}/repeated_fields.csv +0 -0
- data/{data → test/fixtures}/test_binomial.csv +0 -0
- data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
- data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
- data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
- data/{data → test/fixtures}/tetmat_test.txt +0 -0
- data/test/helpers_tests.rb +18 -2
- data/test/test_analysis.rb +118 -0
- data/test/test_anovatwoway.rb +1 -1
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +1 -2
- data/test/test_bartlettsphericity.rb +1 -2
- data/test/test_bivariate.rb +64 -22
- data/test/test_codification.rb +1 -2
- data/test/test_crosstab.rb +1 -2
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +24 -3
- data/test/test_dominance_analysis.rb +1 -2
- data/test/test_factor.rb +8 -69
- data/test/test_factor_map.rb +43 -0
- data/test/test_factor_pa.rb +54 -0
- data/test/test_ggobi.rb +1 -1
- data/test/test_gsl.rb +12 -18
- data/test/test_histogram.rb +1 -2
- data/test/test_logit.rb +62 -18
- data/test/test_matrix.rb +4 -5
- data/test/test_mle.rb +3 -4
- data/test/test_regression.rb +21 -2
- data/test/test_reliability.rb +3 -3
- data/test/test_reliability_icc.rb +1 -1
- data/test/test_reliability_skillscale.rb +20 -4
- data/test/test_resample.rb +1 -2
- data/test/test_rserve_extension.rb +1 -2
- data/test/test_srs.rb +1 -2
- data/test/test_statistics.rb +1 -2
- data/test/test_stest.rb +1 -2
- data/test/test_stratified.rb +1 -2
- data/test/test_test_f.rb +1 -2
- data/test/test_test_t.rb +1 -2
- data/test/test_umannwhitney.rb +1 -2
- data/test/test_vector.rb +117 -18
- data/test/test_xls.rb +2 -3
- data/web/Rakefile +39 -0
- metadata +109 -29
- metadata.gz.sig +0 -0
- data/examples/parallel_analysis_tetrachoric.rb +0 -31
- data/lib/distribution.rb +0 -25
- data/lib/distribution/chisquare.rb +0 -23
- data/lib/distribution/f.rb +0 -35
- data/lib/distribution/normal.rb +0 -60
- data/lib/distribution/normalbivariate.rb +0 -284
- data/lib/distribution/normalmultivariate.rb +0 -73
- data/lib/distribution/t.rb +0 -55
- data/test/test_distribution.rb +0 -73
|
@@ -27,7 +27,7 @@ module Factor
|
|
|
27
27
|
attr_accessor :max_iterations
|
|
28
28
|
# Maximum precision
|
|
29
29
|
attr_accessor :epsilon
|
|
30
|
-
|
|
30
|
+
attr_accessor :use_gsl
|
|
31
31
|
dirty_writer :max_iterations, :epsilon
|
|
32
32
|
dirty_memoize :iterations, :rotated, :component_transformation_matrix, :h2
|
|
33
33
|
|
|
@@ -41,6 +41,7 @@ module Factor
|
|
|
41
41
|
@epsilon=EPSILON
|
|
42
42
|
@rotated=nil
|
|
43
43
|
@h2=(@matrix.collect {|c| c**2} * Matrix.column_vector([1]*@m)).column(0).to_a
|
|
44
|
+
@use_gsl=Statsample.has_gsl?
|
|
44
45
|
opts.each{|k,v|
|
|
45
46
|
self.send("#{k}=",v) if self.respond_to? k
|
|
46
47
|
}
|
|
@@ -58,11 +59,12 @@ module Factor
|
|
|
58
59
|
end
|
|
59
60
|
# Start iteration
|
|
60
61
|
def iterate
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
62
|
+
k_matrix=@use_gsl ? GSL::Matrix : ::Matrix
|
|
63
|
+
t=k_matrix.identity(@m)
|
|
64
|
+
b=(@use_gsl ? @matrix.to_gsl : @matrix.dup)
|
|
65
|
+
h=k_matrix.diagonal(*@h2).collect {|c| Math::sqrt(c)}
|
|
64
66
|
h_inverse=h.collect {|c| c!=0 ? 1/c : 0 }
|
|
65
|
-
bh=h_inverse*b
|
|
67
|
+
bh=h_inverse * b
|
|
66
68
|
@not_converged=true
|
|
67
69
|
@iterations=0
|
|
68
70
|
while @not_converged
|
|
@@ -110,9 +112,14 @@ module Factor
|
|
|
110
112
|
t[row_i][i]=tx_rot[row_i]
|
|
111
113
|
t[row_i][j]=ty_rot[row_i]
|
|
112
114
|
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
115
|
+
#if @use_gsl
|
|
116
|
+
bh=k_matrix.[](*bh)
|
|
117
|
+
t=k_matrix.[](*t)
|
|
118
|
+
#else
|
|
119
|
+
# bh=Matrix.rows(bh)
|
|
120
|
+
# t=Matrix.rows(t)
|
|
121
|
+
|
|
122
|
+
#end
|
|
116
123
|
else
|
|
117
124
|
num_pairs=num_pairs-1
|
|
118
125
|
@not_converged=false if num_pairs==0
|
|
@@ -49,7 +49,6 @@ module Statsample
|
|
|
49
49
|
# to the anchor location. For example, with the default left alignment,
|
|
50
50
|
# an angle of Math.PI / 2 causes text to proceed downwards. The default angle is zero.
|
|
51
51
|
attr_accessor :label_angle
|
|
52
|
-
|
|
53
52
|
attr_reader :x_scale, :y_scale
|
|
54
53
|
# Create a new Boxplot.
|
|
55
54
|
# Parameters: Hash of options
|
|
@@ -223,11 +222,11 @@ module Statsample
|
|
|
223
222
|
dot.bottom {|v| y_scale.scale(v)}
|
|
224
223
|
dot.title {|v| v}
|
|
225
224
|
end
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
end
|
|
225
|
+
end
|
|
229
226
|
end
|
|
227
|
+
vis
|
|
230
228
|
end
|
|
229
|
+
|
|
231
230
|
# Returns SVG with scatterplot
|
|
232
231
|
def to_svg
|
|
233
232
|
rp=rubyvis_panel
|
|
@@ -120,7 +120,7 @@ module Statsample
|
|
|
120
120
|
y_scale=Rubyvis::Scale.linear(@minimum_y, @maximum_y).range(0, height - margin_vert)
|
|
121
121
|
|
|
122
122
|
y_scale.nice
|
|
123
|
-
|
|
123
|
+
|
|
124
124
|
bins=@hist.bins.times.map {|i|
|
|
125
125
|
{
|
|
126
126
|
:low =>@hist.get_range(i)[0],
|
|
@@ -170,6 +170,7 @@ module Statsample
|
|
|
170
170
|
end
|
|
171
171
|
rubyvis_normal_distribution(pan) if @line_normal_distribution
|
|
172
172
|
end
|
|
173
|
+
vis
|
|
173
174
|
end
|
|
174
175
|
# Returns SVG with scatterplot
|
|
175
176
|
def to_svg
|
data/lib/statsample/matrix.rb
CHANGED
|
@@ -2,6 +2,9 @@ class ::Vector
|
|
|
2
2
|
def to_matrix
|
|
3
3
|
::Matrix.columns([self.to_a])
|
|
4
4
|
end
|
|
5
|
+
def to_vector
|
|
6
|
+
self
|
|
7
|
+
end
|
|
5
8
|
end
|
|
6
9
|
class ::Matrix
|
|
7
10
|
def to_matrix
|
|
@@ -28,18 +31,24 @@ class ::Matrix
|
|
|
28
31
|
if Statsample.has_gsl?
|
|
29
32
|
# Optimize eigenpairs of extendmatrix module using gsl
|
|
30
33
|
def eigenpairs
|
|
31
|
-
|
|
32
|
-
ep=eigval.size.times.map {|i|
|
|
33
|
-
[eigval[i], eigvec.get_col(i).to_a]
|
|
34
|
-
}
|
|
35
|
-
ep.sort{|a,b| a[0]<=>b[0]}.reverse
|
|
34
|
+
to_gsl.eigenpairs
|
|
36
35
|
end
|
|
37
36
|
end
|
|
38
37
|
|
|
39
38
|
def eigenvalues
|
|
40
|
-
|
|
39
|
+
eigenpairs.collect {|v| v[0]}
|
|
40
|
+
end
|
|
41
|
+
def eigenvectors
|
|
42
|
+
eigenpairs.collect {|v| v[1]}
|
|
43
|
+
end
|
|
44
|
+
def eigenvectors_matrix
|
|
45
|
+
Matrix.columns(eigenvectors)
|
|
41
46
|
end
|
|
47
|
+
|
|
48
|
+
|
|
42
49
|
|
|
50
|
+
|
|
51
|
+
|
|
43
52
|
def to_gsl
|
|
44
53
|
out=[]
|
|
45
54
|
self.row_size.times{|i|
|
|
@@ -55,18 +64,94 @@ module GSL
|
|
|
55
64
|
def to_matrix
|
|
56
65
|
::Matrix.columns([self.size.times.map {|i| self[i]}])
|
|
57
66
|
end
|
|
67
|
+
def to_ary
|
|
68
|
+
to_a
|
|
69
|
+
end
|
|
70
|
+
def to_gsl
|
|
71
|
+
self
|
|
72
|
+
end
|
|
58
73
|
end
|
|
59
74
|
end
|
|
60
75
|
class Matrix
|
|
61
76
|
def to_gsl
|
|
62
77
|
self
|
|
63
78
|
end
|
|
79
|
+
|
|
80
|
+
def to_dataset
|
|
81
|
+
f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i| _("VAR_%d") % (i+1) }
|
|
82
|
+
ds=Statsample::Dataset.new(f)
|
|
83
|
+
f.each do |ff|
|
|
84
|
+
ds[ff].type=:scale
|
|
85
|
+
ds[ff].name=ff
|
|
86
|
+
end
|
|
87
|
+
row_size.times {|i|
|
|
88
|
+
ds.add_case_array(self.row(i).to_a)
|
|
89
|
+
}
|
|
90
|
+
ds.update_valid_data
|
|
91
|
+
ds.name=self.name if self.respond_to? :name
|
|
92
|
+
ds
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def row_size
|
|
96
|
+
size1
|
|
97
|
+
end
|
|
98
|
+
def column_size
|
|
99
|
+
size2
|
|
100
|
+
end
|
|
101
|
+
def determinant
|
|
102
|
+
det
|
|
103
|
+
end
|
|
104
|
+
def inverse
|
|
105
|
+
GSL::Linalg::LU.invert(self)
|
|
106
|
+
end
|
|
107
|
+
def eigenvalues
|
|
108
|
+
eigenpairs.collect {|v| v[0]}
|
|
109
|
+
end
|
|
110
|
+
def eigenvectors
|
|
111
|
+
eigenpairs.collect {|v| v[1]}
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Matrix sum of squares
|
|
115
|
+
def mssq
|
|
116
|
+
sum=0
|
|
117
|
+
to_v.each {|i| sum+=i**2}
|
|
118
|
+
sum
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def eigenvectors_matrix
|
|
122
|
+
eigval, eigvec= GSL::Eigen.symmv(self)
|
|
123
|
+
GSL::Eigen::symmv_sort(eigval, eigvec, GSL::Eigen::SORT_VAL_DESC)
|
|
124
|
+
eigvec
|
|
125
|
+
end
|
|
126
|
+
def eigenpairs
|
|
127
|
+
eigval, eigvec= GSL::Eigen.symmv(self)
|
|
128
|
+
GSL::Eigen::symmv_sort(eigval, eigvec, GSL::Eigen::SORT_VAL_DESC)
|
|
129
|
+
@eigenpairs=eigval.size.times.map {|i|
|
|
130
|
+
[eigval[i],eigvec.get_col(i)]
|
|
131
|
+
}
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
#def eigenpairs_ruby
|
|
135
|
+
# self.to_matrix.eigenpairs_ruby
|
|
136
|
+
#end
|
|
137
|
+
def square?
|
|
138
|
+
size1==size2
|
|
139
|
+
end
|
|
64
140
|
def to_matrix
|
|
65
141
|
rows=self.size1
|
|
66
142
|
cols=self.size2
|
|
67
143
|
out=(0...rows).collect{|i| (0...cols).collect {|j| self[i,j]} }
|
|
68
144
|
::Matrix.rows(out)
|
|
69
145
|
end
|
|
146
|
+
def total_sum
|
|
147
|
+
sum=0
|
|
148
|
+
size1.times {|i|
|
|
149
|
+
size2.times {|j|
|
|
150
|
+
sum+=self[i,j]
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
sum
|
|
154
|
+
end
|
|
70
155
|
end
|
|
71
156
|
end
|
|
72
157
|
|
|
@@ -122,7 +207,7 @@ module Statsample
|
|
|
122
207
|
@@covariatematrix=0
|
|
123
208
|
|
|
124
209
|
# Get type of covariate matrix. Could be :covariance or :correlation
|
|
125
|
-
def
|
|
210
|
+
def _type
|
|
126
211
|
if row_size==column_size
|
|
127
212
|
if row_size.times.find {|i| self[i,i]!=1.0}
|
|
128
213
|
:covariance
|
|
@@ -134,11 +219,11 @@ module Statsample
|
|
|
134
219
|
end
|
|
135
220
|
|
|
136
221
|
end
|
|
137
|
-
def
|
|
222
|
+
def _type=(t)
|
|
138
223
|
@type=t
|
|
139
224
|
end
|
|
140
225
|
def correlation
|
|
141
|
-
if(
|
|
226
|
+
if(_type==:covariance)
|
|
142
227
|
matrix=Matrix.rows(row_size.times.collect { |i|
|
|
143
228
|
column_size.times.collect { |j|
|
|
144
229
|
if i==j
|
|
@@ -151,7 +236,7 @@ module Statsample
|
|
|
151
236
|
matrix.extend CovariateMatrix
|
|
152
237
|
matrix.fields_x=fields_x
|
|
153
238
|
matrix.fields_y=fields_y
|
|
154
|
-
matrix.
|
|
239
|
+
matrix._type=:correlation
|
|
155
240
|
matrix
|
|
156
241
|
else
|
|
157
242
|
self
|
|
@@ -192,12 +277,17 @@ module Statsample
|
|
|
192
277
|
columns||=rows
|
|
193
278
|
# Convert all labels on index
|
|
194
279
|
row_index=rows.collect {|v|
|
|
195
|
-
v.is_a?(Numeric) ? v : fields_x.index(v)
|
|
280
|
+
r=v.is_a?(Numeric) ? v : fields_x.index(v)
|
|
281
|
+
raise "Index #{v} doesn't exists on matrix" if r.nil?
|
|
282
|
+
r
|
|
196
283
|
}
|
|
197
284
|
column_index=columns.collect {|v|
|
|
198
|
-
v.is_a?(Numeric) ? v : fields_y.index(v)
|
|
285
|
+
r=v.is_a?(Numeric) ? v : fields_y.index(v)
|
|
286
|
+
raise "Index #{v} doesn't exists on matrix" if r.nil?
|
|
287
|
+
r
|
|
199
288
|
}
|
|
200
|
-
|
|
289
|
+
|
|
290
|
+
|
|
201
291
|
fx=row_index.collect {|v| fields_x[v]}
|
|
202
292
|
fy=column_index.collect {|v| fields_y[v]}
|
|
203
293
|
|
|
@@ -206,14 +296,14 @@ module Statsample
|
|
|
206
296
|
matrix.extend CovariateMatrix
|
|
207
297
|
matrix.fields_x=fx
|
|
208
298
|
matrix.fields_y=fy
|
|
209
|
-
matrix.
|
|
299
|
+
matrix._type=_type
|
|
210
300
|
matrix
|
|
211
301
|
end
|
|
212
302
|
def report_building(generator)
|
|
213
|
-
@name||= (
|
|
303
|
+
@name||= (_type==:correlation ? _("Correlation"):_("Covariance"))+_(" Matrix")
|
|
214
304
|
generator.table(:name=>@name, :header=>[""]+fields_y) do |t|
|
|
215
305
|
row_size.times {|i|
|
|
216
|
-
t.row([fields_x[i]]
|
|
306
|
+
t.row([fields_x[i]]+row(i).to_a.collect {|i1|
|
|
217
307
|
i1.nil? ? "--" : sprintf("%0.3f",i1).gsub("0.",".")
|
|
218
308
|
})
|
|
219
309
|
}
|
|
@@ -22,6 +22,9 @@ module Statsample
|
|
|
22
22
|
# * Logit Regression: Statsample::Regression::Binomial::Logit
|
|
23
23
|
# * Probit Regression: Statsample::Regression::Binomial::Probit
|
|
24
24
|
module Regression
|
|
25
|
+
|
|
26
|
+
LinearDependency=Class.new(Exception)
|
|
27
|
+
|
|
25
28
|
# Create a Statsample::Regression::Simple object, for simple regression
|
|
26
29
|
# * x: independent Vector
|
|
27
30
|
# * y: dependent Vector
|
|
@@ -78,7 +81,7 @@ module Statsample
|
|
|
78
81
|
if missing_data==:pairwise
|
|
79
82
|
Statsample::Regression::Multiple::RubyEngine.new(ds,y_var, opts)
|
|
80
83
|
else
|
|
81
|
-
if Statsample.has_gsl?
|
|
84
|
+
if Statsample.has_gsl? and false
|
|
82
85
|
Statsample::Regression::Multiple::GslEngine.new(ds, y_var, opts)
|
|
83
86
|
else
|
|
84
87
|
ds2=ds.dup_only_valid
|
|
@@ -12,6 +12,8 @@ module Statsample
|
|
|
12
12
|
attr_reader :valid_cases
|
|
13
13
|
# Number of total cases (dataset.cases)
|
|
14
14
|
attr_reader :total_cases
|
|
15
|
+
|
|
16
|
+
attr_accessor :digits
|
|
15
17
|
def self.univariate?
|
|
16
18
|
true
|
|
17
19
|
end
|
|
@@ -23,9 +25,15 @@ module Statsample
|
|
|
23
25
|
@y_var=y_var
|
|
24
26
|
@r2=nil
|
|
25
27
|
@name=_("Multiple Regression: %s over %s") % [ ds.fields.join(",") , @y_var]
|
|
26
|
-
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
opts_default={:digits=>3}
|
|
31
|
+
@opts=opts_default.merge opts
|
|
32
|
+
|
|
33
|
+
@opts.each{|k,v|
|
|
27
34
|
self.send("#{k}=",v) if self.respond_to? k
|
|
28
35
|
}
|
|
36
|
+
|
|
29
37
|
end
|
|
30
38
|
# Calculate F Test
|
|
31
39
|
def anova
|
|
@@ -159,7 +167,7 @@ module Statsample
|
|
|
159
167
|
columns.unshift([1.0]*@valid_cases)
|
|
160
168
|
x=Matrix.columns(columns)
|
|
161
169
|
matrix=((x.t*x)).inverse * mse
|
|
162
|
-
matrix.collect {|i| Math::sqrt(i) if i
|
|
170
|
+
matrix.collect {|i| Math::sqrt(i) if i>=0 }
|
|
163
171
|
end
|
|
164
172
|
# T for constant
|
|
165
173
|
def constant_t
|
|
@@ -170,24 +178,26 @@ module Statsample
|
|
|
170
178
|
estimated_variance_covariance_matrix[0,0]
|
|
171
179
|
end
|
|
172
180
|
def report_building(b)
|
|
181
|
+
di="%0.#{digits}f"
|
|
173
182
|
b.section(:name=>@name) do |g|
|
|
174
183
|
c=coeffs
|
|
175
184
|
g.text _("Engine: %s") % self.class
|
|
176
185
|
g.text(_("Cases(listwise)=%d(%d)") % [@total_cases, @valid_cases])
|
|
177
|
-
g.text _("R
|
|
178
|
-
g.text _("R^2
|
|
179
|
-
g.text _"R^2 Adj
|
|
180
|
-
g.text _("Std.Error R
|
|
186
|
+
g.text _("R=#{di}") % r
|
|
187
|
+
g.text _("R^2=#{di}") % r2
|
|
188
|
+
g.text _"R^2 Adj=#{di}" % r2_adjusted
|
|
189
|
+
g.text _("Std.Error R=#{di}") % se_estimate
|
|
181
190
|
|
|
182
|
-
g.text(_("Equation")+"="+ sprintf(
|
|
191
|
+
g.text(_("Equation")+"="+ sprintf(di,constant) +" + "+ @fields.collect {|k| sprintf("#{di}%s",c[k],k)}.join(' + ') )
|
|
183
192
|
|
|
184
193
|
g.parse_element(anova)
|
|
185
194
|
sc=standarized_coeffs
|
|
195
|
+
|
|
186
196
|
cse=coeffs_se
|
|
187
197
|
g.table(:name=>_("Beta coefficients"), :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
|
|
188
|
-
t.row([_("Constant"), sprintf(
|
|
198
|
+
t.row([_("Constant"), sprintf(di, constant), "-", constant_se.nil? ? "": sprintf(di, constant_se), constant_t.nil? ? "" : sprintf(di, constant_t)])
|
|
189
199
|
@fields.each do |f|
|
|
190
|
-
t.row([f, sprintf(
|
|
200
|
+
t.row([f, sprintf(di, c[f]), sprintf(di, sc[f]), sprintf(di, cse[f]), sprintf(di, c[f].quo(cse[f]))])
|
|
191
201
|
end
|
|
192
202
|
end
|
|
193
203
|
end
|
|
@@ -1,131 +1,132 @@
|
|
|
1
1
|
if Statsample.has_gsl?
|
|
2
|
-
module Statsample
|
|
3
|
-
module Regression
|
|
4
|
-
module Multiple
|
|
5
|
-
# Class for Multiple Regression Analysis
|
|
6
|
-
# Requires rbgsl and uses a listwise aproach.
|
|
7
|
-
# Slower on prediction of values than Alglib, because predict is ruby based.
|
|
8
|
-
# Better memory management on multiple (+1000) series of regression.
|
|
9
|
-
# If you need pairwise, use RubyEngine
|
|
10
|
-
# Example:
|
|
11
|
-
#
|
|
12
|
-
# @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
|
13
|
-
# @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
|
14
|
-
# @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
|
15
|
-
# @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
|
16
|
-
# ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
|
17
|
-
# lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y')
|
|
18
|
-
#
|
|
19
|
-
class GslEngine < BaseEngine
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
2
|
+
module Statsample
|
|
3
|
+
module Regression
|
|
4
|
+
module Multiple
|
|
5
|
+
# Class for Multiple Regression Analysis
|
|
6
|
+
# Requires rbgsl and uses a listwise aproach.
|
|
7
|
+
# Slower on prediction of values than Alglib, because predict is ruby based.
|
|
8
|
+
# Better memory management on multiple (+1000) series of regression.
|
|
9
|
+
# If you need pairwise, use RubyEngine
|
|
10
|
+
# Example:
|
|
11
|
+
#
|
|
12
|
+
# @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
|
13
|
+
# @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
|
14
|
+
# @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
|
15
|
+
# @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
|
16
|
+
# ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
|
17
|
+
# lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y')
|
|
18
|
+
#
|
|
19
|
+
class GslEngine < BaseEngine
|
|
20
|
+
def initialize(ds,y_var, opts=Hash.new)
|
|
21
|
+
super
|
|
22
|
+
@ds=ds.dup_only_valid
|
|
23
|
+
@ds_valid=@ds
|
|
24
|
+
@valid_cases=@ds_valid.cases
|
|
25
|
+
@dy=@ds[@y_var]
|
|
26
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
|
27
|
+
# Create a custom matrix
|
|
28
|
+
columns=[]
|
|
29
|
+
@fields=[]
|
|
30
|
+
max_deps = GSL::Matrix.alloc(@ds.cases, @ds.fields.size)
|
|
31
|
+
constant_col=@ds.fields.size-1
|
|
32
|
+
for i in 0...@ds.cases
|
|
33
|
+
max_deps.set(i,constant_col,1)
|
|
34
|
+
end
|
|
35
|
+
j=0
|
|
36
|
+
@ds.fields.each{|f|
|
|
37
|
+
if f!=@y_var
|
|
38
|
+
@ds[f].each_index{|i1|
|
|
39
|
+
max_deps.set(i1,j,@ds[f][i1])
|
|
40
|
+
}
|
|
41
|
+
columns.push(@ds[f].to_a)
|
|
42
|
+
@fields.push(f)
|
|
43
|
+
j+=1
|
|
44
|
+
end
|
|
40
45
|
}
|
|
41
|
-
columns.
|
|
42
|
-
@
|
|
43
|
-
|
|
46
|
+
@dep_columns=columns.dup
|
|
47
|
+
@lr_s=nil
|
|
48
|
+
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
|
|
49
|
+
@constant=c[constant_col]
|
|
50
|
+
@coeffs_a=c.to_a.slice(0...constant_col)
|
|
51
|
+
@coeffs=assign_names(@coeffs_a)
|
|
52
|
+
c=nil
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def _dump(i)
|
|
56
|
+
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
|
57
|
+
end
|
|
58
|
+
def self._load(data)
|
|
59
|
+
h=Marshal.load(data)
|
|
60
|
+
self.new(h['ds'], h['y_var'])
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def coeffs
|
|
64
|
+
@coeffs
|
|
65
|
+
end
|
|
66
|
+
# Coefficients using a constant
|
|
67
|
+
# Based on http://www.xycoon.com/ols1.htm
|
|
68
|
+
def matrix_resolution
|
|
69
|
+
columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
|
|
70
|
+
columns.unshift([1.0]*@ds.cases)
|
|
71
|
+
y=Matrix.columns([@dy.data.map {|i| i.to_f}])
|
|
72
|
+
x=Matrix.columns(columns)
|
|
73
|
+
xt=x.t
|
|
74
|
+
matrix=((xt*x)).inverse*xt
|
|
75
|
+
matrix*y
|
|
76
|
+
end
|
|
77
|
+
def r2
|
|
78
|
+
r**2
|
|
79
|
+
end
|
|
80
|
+
def r
|
|
81
|
+
Bivariate::pearson(@dy, predicted)
|
|
82
|
+
end
|
|
83
|
+
def sst
|
|
84
|
+
@dy.ss
|
|
85
|
+
end
|
|
86
|
+
def constant
|
|
87
|
+
@constant
|
|
88
|
+
end
|
|
89
|
+
def standarized_coeffs
|
|
90
|
+
l=lr_s
|
|
91
|
+
l.coeffs
|
|
92
|
+
end
|
|
93
|
+
def lr_s
|
|
94
|
+
if @lr_s.nil?
|
|
95
|
+
build_standarized
|
|
96
|
+
end
|
|
97
|
+
@lr_s
|
|
98
|
+
end
|
|
99
|
+
def build_standarized
|
|
100
|
+
@ds_s=@ds.standarize
|
|
101
|
+
@lr_s=GslEngine.new(@ds_s,@y_var)
|
|
102
|
+
end
|
|
103
|
+
def process_s(v)
|
|
104
|
+
lr_s.process(v)
|
|
105
|
+
end
|
|
106
|
+
# ???? Not equal to SPSS output
|
|
107
|
+
def standarized_residuals
|
|
108
|
+
res=residuals
|
|
109
|
+
red_sd=residuals.sds
|
|
110
|
+
res.collect {|v|
|
|
111
|
+
v.quo(red_sd)
|
|
112
|
+
}.to_vector(:scale)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Standard error for coeffs
|
|
116
|
+
def coeffs_se
|
|
117
|
+
out={}
|
|
118
|
+
evcm=estimated_variance_covariance_matrix
|
|
119
|
+
@ds_valid.fields.each_with_index do |f,i|
|
|
120
|
+
|
|
121
|
+
mi=i+1
|
|
122
|
+
next if f==@y_var
|
|
123
|
+
out[f]=evcm[mi,mi]
|
|
124
|
+
end
|
|
125
|
+
out
|
|
126
|
+
end
|
|
127
|
+
|
|
44
128
|
end
|
|
45
|
-
|
|
46
|
-
@dep_columns=columns.dup
|
|
47
|
-
@lr_s=nil
|
|
48
|
-
c, @cov, @chisq, @status = GSL::MultiFit.linear(max_deps, @dy.gsl)
|
|
49
|
-
@constant=c[constant_col]
|
|
50
|
-
@coeffs_a=c.to_a.slice(0...constant_col)
|
|
51
|
-
@coeffs=assign_names(@coeffs_a)
|
|
52
|
-
c=nil
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def _dump(i)
|
|
56
|
-
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
|
|
57
|
-
end
|
|
58
|
-
def self._load(data)
|
|
59
|
-
h=Marshal.load(data)
|
|
60
|
-
self.new(h['ds'], h['y_var'])
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
def coeffs
|
|
64
|
-
@coeffs
|
|
65
|
-
end
|
|
66
|
-
# Coefficients using a constant
|
|
67
|
-
# Based on http://www.xycoon.com/ols1.htm
|
|
68
|
-
def matrix_resolution
|
|
69
|
-
columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
|
|
70
|
-
columns.unshift([1.0]*@ds.cases)
|
|
71
|
-
y=Matrix.columns([@dy.data.map {|i| i.to_f}])
|
|
72
|
-
x=Matrix.columns(columns)
|
|
73
|
-
xt=x.t
|
|
74
|
-
matrix=((xt*x)).inverse*xt
|
|
75
|
-
matrix*y
|
|
76
|
-
end
|
|
77
|
-
def r2
|
|
78
|
-
r**2
|
|
79
|
-
end
|
|
80
|
-
def r
|
|
81
|
-
Bivariate::pearson(@dy, predicted)
|
|
82
|
-
end
|
|
83
|
-
def sst
|
|
84
|
-
@dy.ss
|
|
85
|
-
end
|
|
86
|
-
def constant
|
|
87
|
-
@constant
|
|
88
|
-
end
|
|
89
|
-
def standarized_coeffs
|
|
90
|
-
l=lr_s
|
|
91
|
-
l.coeffs
|
|
92
|
-
end
|
|
93
|
-
def lr_s
|
|
94
|
-
if @lr_s.nil?
|
|
95
|
-
build_standarized
|
|
96
|
-
end
|
|
97
|
-
@lr_s
|
|
98
|
-
end
|
|
99
|
-
def build_standarized
|
|
100
|
-
@ds_s=@ds.standarize
|
|
101
|
-
@lr_s=GslEngine.new(@ds_s,@y_var)
|
|
102
|
-
end
|
|
103
|
-
def process_s(v)
|
|
104
|
-
lr_s.process(v)
|
|
105
|
-
end
|
|
106
|
-
# ???? Not equal to SPSS output
|
|
107
|
-
def standarized_residuals
|
|
108
|
-
res=residuals
|
|
109
|
-
red_sd=residuals.sds
|
|
110
|
-
res.collect {|v|
|
|
111
|
-
v.quo(red_sd)
|
|
112
|
-
}.to_vector(:scale)
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
# Standard error for coeffs
|
|
116
|
-
def coeffs_se
|
|
117
|
-
out={}
|
|
118
|
-
evcm=estimated_variance_covariance_matrix
|
|
119
|
-
@ds_valid.fields.each_with_index do |f,i|
|
|
120
|
-
mi=i+1
|
|
121
|
-
next if f==@y_var
|
|
122
|
-
out[f]=evcm[mi,mi]
|
|
129
|
+
end
|
|
123
130
|
end
|
|
124
|
-
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
end
|
|
128
|
-
end
|
|
129
|
-
end
|
|
130
|
-
end # for Statsample
|
|
131
|
+
end # for Statsample
|
|
131
132
|
end # for if
|