statsample 0.6.5 → 0.6.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/Manifest.txt +6 -0
- data/README.txt +30 -12
- data/Rakefile +91 -0
- data/demo/levene.rb +9 -0
- data/demo/multiple_regression.rb +1 -7
- data/demo/polychoric.rb +1 -0
- data/demo/principal_axis.rb +8 -0
- data/lib/distribution/f.rb +22 -22
- data/lib/spss.rb +99 -99
- data/lib/statsample/bivariate/polychoric.rb +32 -22
- data/lib/statsample/bivariate/tetrachoric.rb +212 -207
- data/lib/statsample/bivariate.rb +6 -6
- data/lib/statsample/codification.rb +65 -65
- data/lib/statsample/combination.rb +60 -59
- data/lib/statsample/converter/csv19.rb +12 -12
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +93 -36
- data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
- data/lib/statsample/dominanceanalysis.rb +5 -6
- data/lib/statsample/factor/pca.rb +41 -11
- data/lib/statsample/factor/principalaxis.rb +105 -29
- data/lib/statsample/factor/rotation.rb +20 -3
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/graph/gdchart.rb +13 -13
- data/lib/statsample/graph/svggraph.rb +166 -167
- data/lib/statsample/matrix.rb +22 -12
- data/lib/statsample/mle/logit.rb +3 -2
- data/lib/statsample/mle/probit.rb +7 -5
- data/lib/statsample/mle.rb +4 -2
- data/lib/statsample/multiset.rb +125 -124
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression/binomial/logit.rb +4 -3
- data/lib/statsample/regression/binomial/probit.rb +2 -1
- data/lib/statsample/regression/binomial.rb +62 -81
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
- data/lib/statsample/regression/multiple.rb +15 -42
- data/lib/statsample/regression/simple.rb +93 -78
- data/lib/statsample/regression.rb +74 -2
- data/lib/statsample/reliability.rb +117 -120
- data/lib/statsample/srs.rb +156 -153
- data/lib/statsample/test/levene.rb +90 -0
- data/lib/statsample/test/umannwhitney.rb +25 -9
- data/lib/statsample/test.rb +2 -0
- data/lib/statsample/vector.rb +388 -413
- data/lib/statsample.rb +74 -30
- data/po/es/statsample.mo +0 -0
- data/test/test_bivariate.rb +5 -4
- data/test/test_combination.rb +1 -1
- data/test/test_dataset.rb +2 -2
- data/test/test_factor.rb +53 -6
- data/test/test_gsl.rb +1 -1
- data/test/test_mle.rb +1 -1
- data/test/test_regression.rb +18 -33
- data/test/test_statistics.rb +15 -33
- data/test/test_stest.rb +35 -0
- data/test/test_svg_graph.rb +2 -2
- data/test/test_vector.rb +331 -333
- metadata +38 -11
@@ -101,7 +101,7 @@ class MatrixEngine < BaseEngine
|
|
101
101
|
# Get R^2 for the regression
|
102
102
|
# Equal to
|
103
103
|
# * 1-(|R| / |R_x|) or
|
104
|
-
# * Sum(b_i*r_yi)
|
104
|
+
# * Sum(b_i*r_yi) <- used
|
105
105
|
def r2
|
106
106
|
@n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
|
107
107
|
end
|
@@ -113,13 +113,16 @@ class MatrixEngine < BaseEngine
|
|
113
113
|
c=coeffs
|
114
114
|
@y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
|
115
115
|
end
|
116
|
+
# Hash of b or raw coefficients
|
116
117
|
def coeffs
|
117
118
|
assign_names(@coeffs)
|
118
119
|
end
|
120
|
+
# Hash of beta or standarized coefficients
|
121
|
+
|
119
122
|
def standarized_coeffs
|
120
123
|
assign_names(@coeffs_stan)
|
121
124
|
end
|
122
|
-
|
125
|
+
# Total sum of squares
|
123
126
|
def sst
|
124
127
|
@y_sd**2*(cases-1.0)
|
125
128
|
end
|
@@ -134,9 +137,11 @@ class MatrixEngine < BaseEngine
|
|
134
137
|
end
|
135
138
|
|
136
139
|
# Tolerance for a given variable
|
137
|
-
# defined as (1-
|
140
|
+
# defined as (1-R^2) of regression of other independent variables
|
138
141
|
# over the selected
|
139
|
-
#
|
142
|
+
# Reference:
|
143
|
+
#
|
144
|
+
# * http://talkstats.com/showthread.php?t=5056
|
140
145
|
def tolerance(var)
|
141
146
|
lr=Statsample::Regression::Multiple::MatrixEngine.new(@matrix_x, var)
|
142
147
|
1-lr.r2
|
@@ -146,7 +151,8 @@ class MatrixEngine < BaseEngine
|
|
146
151
|
# * Tolerance of the coeffients: Higher tolerances implies higher error
|
147
152
|
# * Higher r2 implies lower error
|
148
153
|
|
149
|
-
# Reference:
|
154
|
+
# Reference:
|
155
|
+
# * Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
|
150
156
|
#
|
151
157
|
def coeffs_se
|
152
158
|
out={}
|
@@ -188,7 +194,7 @@ class MatrixEngine < BaseEngine
|
|
188
194
|
matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
|
189
195
|
end
|
190
196
|
|
191
|
-
def to_reportbuilder(generator)
|
197
|
+
def to_reportbuilder(generator) # :nodoc:
|
192
198
|
anchor=generator.add_toc_entry(_("Multiple Regression: ")+@name)
|
193
199
|
generator.add_html "<div class='multiple-regression'>#{@name}<a name='#{anchor}'></a>"
|
194
200
|
c=coeffs
|
@@ -1,9 +1,7 @@
|
|
1
1
|
require 'statsample/regression/multiple/baseengine'
|
2
2
|
module Statsample
|
3
3
|
module Regression
|
4
|
-
# Module for
|
5
|
-
#
|
6
|
-
# You can call Statsample::Regression::Multiple.listwise, Statsample::Regression::Multiple.pairwise or instance directly the engines.
|
4
|
+
# Module for OLS Multiple Regression Analysis.
|
7
5
|
#
|
8
6
|
# Use:.
|
9
7
|
#
|
@@ -13,7 +11,7 @@ module Statsample
|
|
13
11
|
# c=1000.times.collect {rand}.to_scale
|
14
12
|
# ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
15
13
|
# ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
|
16
|
-
# lr=Statsample::Regression
|
14
|
+
# lr=Statsample::Regression.multiple(ds,'y')
|
17
15
|
# puts lr.summary
|
18
16
|
# Summary for regression of a,b,c over y
|
19
17
|
# *************************************************************
|
@@ -42,29 +40,6 @@ module Statsample
|
|
42
40
|
# -----------------------------------------------
|
43
41
|
#
|
44
42
|
module Multiple
|
45
|
-
# Creates an object for listwise regression.
|
46
|
-
# Alglib is faster, so is prefered over GSL
|
47
|
-
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
48
|
-
def self.listwise(ds,y_var)
|
49
|
-
if HAS_ALGIB
|
50
|
-
AlglibEngine.new(ds,y_var)
|
51
|
-
elsif HAS_GSL
|
52
|
-
GslEngine.new(ds,y_var)
|
53
|
-
else
|
54
|
-
ds2=ds.dup_only_valid
|
55
|
-
RubyEngine.new(ds2,y_var)
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
# Creates an object for pairwise regression
|
60
|
-
# For now, always retrieves a RubyEngine
|
61
|
-
# lr=Statsample::Regression::Multiple.listwise(ds,'y')
|
62
|
-
def self.pairwise(ds,y_var)
|
63
|
-
RubyEngine.new(ds,y_var)
|
64
|
-
end
|
65
|
-
def self.listwise_by_exp(ds,exp)
|
66
|
-
raise "Not implemented yet"
|
67
|
-
end
|
68
43
|
# Obtain r2 for regressors
|
69
44
|
def self.r2_from_matrices(rxx,rxy)
|
70
45
|
matrix=(rxy.transpose*rxx.inverse*rxy)
|
@@ -76,21 +51,19 @@ module Statsample
|
|
76
51
|
0.0
|
77
52
|
end
|
78
53
|
def initialize(matrix,y_var, opts=Hash.new)
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
54
|
+
matrix.extend Statsample::CovariateMatrix
|
55
|
+
@matrix=matrix
|
56
|
+
@fields=matrix.fields-y_var
|
57
|
+
@y_var=y_var
|
58
|
+
@q=@y_var.size
|
59
|
+
@matrix_cor=matrix.correlation
|
60
|
+
@matrix_cor_xx = @matrix_cor.submatrix(@fields)
|
61
|
+
@matrix_cor_yy = @matrix_cor.submatrix(y_var, y_var)
|
62
|
+
|
63
|
+
@sxx = @matrix.submatrix(@fields)
|
64
|
+
@syy = @matrix.submatrix(y_var, y_var)
|
65
|
+
@sxy = @matrix.submatrix(@fields, y_var)
|
66
|
+
@syx = @sxy.t
|
94
67
|
end
|
95
68
|
|
96
69
|
def r2yx
|
@@ -1,81 +1,96 @@
|
|
1
1
|
module Statsample
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
2
|
+
module Regression
|
3
|
+
# Class for calculation of linear regressions with form
|
4
|
+
# y = a+bx
|
5
|
+
# To create a SimpleRegression object:
|
6
|
+
# * <tt> SimpleRegression.new_from_dataset(ds,x,y)</tt>
|
7
|
+
# * <tt> SimpleRegression.new_from_vectors(vx,vy)</tt>
|
8
|
+
# * <tt> SimpleRegression.new_from_gsl(gsl) </tt>
|
9
|
+
#
|
10
|
+
class Simple
|
11
|
+
attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
|
12
|
+
|
13
|
+
def initialize(init_method, *argv)
|
14
|
+
self.send(init_method, *argv)
|
15
|
+
end
|
16
|
+
private_class_method :new
|
17
|
+
# Obtain y value given x value
|
18
|
+
# x=a+bx
|
19
|
+
|
20
|
+
def y(val_x)
|
21
|
+
@a+@b*val_x
|
22
|
+
end
|
23
|
+
# Obtain x value given y value
|
24
|
+
# x=(y-a)/b
|
25
|
+
def x(val_y)
|
26
|
+
(val_y-@a) / @b.to_f
|
27
|
+
end
|
28
|
+
# Sum of square error
|
29
|
+
def sse
|
30
|
+
(0...@vx.size).inject(0) {|acum,i| acum+((@vy[i]-y(@vx[i]))**2)
|
31
|
+
}
|
32
|
+
end
|
33
|
+
def standard_error
|
34
|
+
Math::sqrt(sse / (@vx.size-2).to_f)
|
35
|
+
end
|
36
|
+
# Sum of square regression
|
37
|
+
def ssr
|
38
|
+
vy_mean=@vy.mean
|
39
|
+
(0...@vx.size).inject(0) {|a,i|
|
40
|
+
a+((y(@vx[i])-vy_mean)**2)
|
41
|
+
}
|
42
|
+
|
43
|
+
end
|
44
|
+
# Sum of square total
|
45
|
+
def sst
|
46
|
+
@vy.sum_of_squared_deviation
|
47
|
+
end
|
48
|
+
# Value of r
|
49
|
+
def r
|
50
|
+
@b * (@vx.sds / @vy.sds)
|
51
|
+
end
|
52
|
+
# Value of r^2
|
53
|
+
def r2
|
54
|
+
r**2
|
55
|
+
end
|
56
|
+
class << self
|
57
|
+
# Create a regression object giving an array with following parameters:
|
58
|
+
# <tt>a,b,cov00, cov01, covx1, chisq, status</tt>
|
59
|
+
# Useful to obtain x and y values with a and b values.
|
60
|
+
def new_from_gsl(ar)
|
61
|
+
new(:init_gsl, *ar)
|
62
|
+
end
|
63
|
+
# Create a simple regression using two vectors
|
64
|
+
def new_from_vectors(vx,vy)
|
65
|
+
new(:init_vectors,vx,vy)
|
66
|
+
end
|
67
|
+
# Create a simple regression using a dataset and two vector names.
|
68
|
+
def new_from_dataset(ds,x,y)
|
69
|
+
new(:init_vectors,ds[x],ds[y])
|
70
|
+
end
|
71
|
+
end
|
72
|
+
def init_vectors(vx,vy)
|
73
|
+
@vx,@vy=Statsample.only_valid(vx,vy)
|
74
|
+
x_m=@vx.mean
|
75
|
+
y_m=@vy.mean
|
76
|
+
num=den=0
|
77
|
+
(0...@vx.size).each {|i|
|
78
|
+
num+=(@vx[i]-x_m)*(@vy[i]-y_m)
|
79
|
+
den+=(@vx[i]-x_m)**2
|
80
|
+
}
|
81
|
+
@b=num.to_f/den
|
82
|
+
@a=y_m - @b*x_m
|
83
|
+
end
|
84
|
+
def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
|
85
|
+
@a=a
|
86
|
+
@b=b
|
87
|
+
@cov00=cov00
|
88
|
+
@cov01=cov01
|
89
|
+
@covx1=covx1
|
90
|
+
@chisq=chisq
|
91
|
+
@status=status
|
92
|
+
end
|
93
|
+
private :init_vectors, :init_gsl
|
80
94
|
end
|
95
|
+
end
|
81
96
|
end
|
@@ -2,7 +2,6 @@ require 'statsample/regression/simple'
|
|
2
2
|
require 'statsample/regression/multiple'
|
3
3
|
|
4
4
|
require 'statsample/regression/multiple/matrixengine'
|
5
|
-
require 'statsample/regression/multiple/alglibengine'
|
6
5
|
require 'statsample/regression/multiple/rubyengine'
|
7
6
|
require 'statsample/regression/multiple/gslengine'
|
8
7
|
|
@@ -11,7 +10,80 @@ require 'statsample/regression/binomial/logit'
|
|
11
10
|
require 'statsample/regression/binomial/probit'
|
12
11
|
|
13
12
|
module Statsample
|
14
|
-
# Module for regression procedures.
|
13
|
+
# = Module for regression procedures.
|
14
|
+
# Use the method on this class to generate
|
15
|
+
# analysis.
|
16
|
+
# If you need more control, you can
|
17
|
+
# create and control directly the objects who computes
|
18
|
+
# the regressions.
|
19
|
+
#
|
20
|
+
# * Simple Regression : Statsample::Regression::Simple
|
21
|
+
# * Multiple Regression: Statsample::Regression::Multiple
|
22
|
+
# * Logit Regression: Statsample::Regression::Binomial::Logit
|
23
|
+
# * Probit Regression: Statsample::Regression::Binomial::Probit
|
15
24
|
module Regression
|
25
|
+
# Create a Statsample::Regression::Simple object, for simple regression
|
26
|
+
# * x: independent Vector
|
27
|
+
# * y: dependent Vector
|
28
|
+
# <b>Usage:</b>
|
29
|
+
# x=100.times.collect {|i| rand(100)}.to_scale
|
30
|
+
# y=100.times.collect {|i| 2+x[i]*2+rand()}.to_scale
|
31
|
+
# sr=Statsample::Regression.simple(x,y)
|
32
|
+
# sr.a
|
33
|
+
# => 2.51763295177808
|
34
|
+
# sr.b
|
35
|
+
# => 1.99973746599856
|
36
|
+
# sr.r
|
37
|
+
# => 0.999987881153254
|
38
|
+
|
39
|
+
def self.simple(x,y)
|
40
|
+
Statsample::Regression::Simple.new_from_vectors(x,y)
|
41
|
+
end
|
42
|
+
# Create a Binomial::Logit object, for logit regression.
|
43
|
+
# * ds:: Dataset
|
44
|
+
# * y:: Name of dependent vector
|
45
|
+
# <b>Usage</b>
|
46
|
+
# dataset=Statsample::CSV.read("data.csv")
|
47
|
+
# lr=Statsample::Regression.logit(dataset,'y')
|
48
|
+
#
|
49
|
+
def self.logit(ds,y_var)
|
50
|
+
Statsample::Regression::Binomial::Logit.new(ds,y_var)
|
51
|
+
end
|
52
|
+
# Create a Binomial::Probit object, for probit regression
|
53
|
+
# * ds:: Dataset
|
54
|
+
# * y:: Name of dependent vector
|
55
|
+
# <b>Usage</b>
|
56
|
+
# dataset=Statsample::CSV.read("data.csv")
|
57
|
+
# lr=Statsample::Regression.probit(dataset,'y')
|
58
|
+
#
|
59
|
+
|
60
|
+
def self.probit(ds,y_var)
|
61
|
+
Statsample::Regression::Binomial::Probit.new(ds,y_var)
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
# Creates one of the Statsample::Regression::Multiple object,
|
66
|
+
# for OLS multiple regression.
|
67
|
+
# Parameters:
|
68
|
+
# * ds: Dataset.
|
69
|
+
# * y: Name of dependent variable.
|
70
|
+
# * missing_data: Could be
|
71
|
+
# * :listwise: delete cases with one or more empty data (default).
|
72
|
+
# * :pairwise: uses correlation matrix. Use with caution.
|
73
|
+
#
|
74
|
+
# <b>Usage:</b>
|
75
|
+
# lr=Statsample::Regression::multiple(ds,'y')
|
76
|
+
def self.multiple(ds,y_var, missing_data=:listwise)
|
77
|
+
if missing_data==:pairwise
|
78
|
+
RubyEngine.new(ds,y_var)
|
79
|
+
else
|
80
|
+
if Statsample.has_gsl?
|
81
|
+
Statsample::Regression::Multiple::GslEngine.new(ds,y_var)
|
82
|
+
else
|
83
|
+
ds2=ds.dup_only_valid
|
84
|
+
Statsample::Regression::Multiple::RubyEngine.new(ds2,y_var)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
16
88
|
end
|
17
89
|
end
|