statsample 0.6.5 → 0.6.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/History.txt +15 -0
  2. data/Manifest.txt +6 -0
  3. data/README.txt +30 -12
  4. data/Rakefile +91 -0
  5. data/demo/levene.rb +9 -0
  6. data/demo/multiple_regression.rb +1 -7
  7. data/demo/polychoric.rb +1 -0
  8. data/demo/principal_axis.rb +8 -0
  9. data/lib/distribution/f.rb +22 -22
  10. data/lib/spss.rb +99 -99
  11. data/lib/statsample/bivariate/polychoric.rb +32 -22
  12. data/lib/statsample/bivariate/tetrachoric.rb +212 -207
  13. data/lib/statsample/bivariate.rb +6 -6
  14. data/lib/statsample/codification.rb +65 -65
  15. data/lib/statsample/combination.rb +60 -59
  16. data/lib/statsample/converter/csv19.rb +12 -12
  17. data/lib/statsample/converters.rb +1 -1
  18. data/lib/statsample/dataset.rb +93 -36
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
  20. data/lib/statsample/dominanceanalysis.rb +5 -6
  21. data/lib/statsample/factor/pca.rb +41 -11
  22. data/lib/statsample/factor/principalaxis.rb +105 -29
  23. data/lib/statsample/factor/rotation.rb +20 -3
  24. data/lib/statsample/factor.rb +1 -1
  25. data/lib/statsample/graph/gdchart.rb +13 -13
  26. data/lib/statsample/graph/svggraph.rb +166 -167
  27. data/lib/statsample/matrix.rb +22 -12
  28. data/lib/statsample/mle/logit.rb +3 -2
  29. data/lib/statsample/mle/probit.rb +7 -5
  30. data/lib/statsample/mle.rb +4 -2
  31. data/lib/statsample/multiset.rb +125 -124
  32. data/lib/statsample/permutation.rb +2 -1
  33. data/lib/statsample/regression/binomial/logit.rb +4 -3
  34. data/lib/statsample/regression/binomial/probit.rb +2 -1
  35. data/lib/statsample/regression/binomial.rb +62 -81
  36. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  37. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  38. data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
  39. data/lib/statsample/regression/multiple.rb +15 -42
  40. data/lib/statsample/regression/simple.rb +93 -78
  41. data/lib/statsample/regression.rb +74 -2
  42. data/lib/statsample/reliability.rb +117 -120
  43. data/lib/statsample/srs.rb +156 -153
  44. data/lib/statsample/test/levene.rb +90 -0
  45. data/lib/statsample/test/umannwhitney.rb +25 -9
  46. data/lib/statsample/test.rb +2 -0
  47. data/lib/statsample/vector.rb +388 -413
  48. data/lib/statsample.rb +74 -30
  49. data/po/es/statsample.mo +0 -0
  50. data/test/test_bivariate.rb +5 -4
  51. data/test/test_combination.rb +1 -1
  52. data/test/test_dataset.rb +2 -2
  53. data/test/test_factor.rb +53 -6
  54. data/test/test_gsl.rb +1 -1
  55. data/test/test_mle.rb +1 -1
  56. data/test/test_regression.rb +18 -33
  57. data/test/test_statistics.rb +15 -33
  58. data/test/test_stest.rb +35 -0
  59. data/test/test_svg_graph.rb +2 -2
  60. data/test/test_vector.rb +331 -333
  61. metadata +38 -11
@@ -1,4 +1,4 @@
1
- if HAS_GSL
1
+ if Statsample.has_gsl?
2
2
  module Statsample
3
3
  module Regression
4
4
  module Multiple
@@ -101,7 +101,7 @@ class MatrixEngine < BaseEngine
101
101
  # Get R^2 for the regression
102
102
  # Equal to
103
103
  # * 1-(|R| / |R_x|) or
104
- # * Sum(b_i*r_yi)
104
+ # * Sum(b_i*r_yi) <- used
105
105
  def r2
106
106
  @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
107
107
  end
@@ -113,13 +113,16 @@ class MatrixEngine < BaseEngine
113
113
  c=coeffs
114
114
  @y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
115
115
  end
116
+ # Hash of b or raw coefficients
116
117
  def coeffs
117
118
  assign_names(@coeffs)
118
119
  end
120
+ # Hash of beta or standarized coefficients
121
+
119
122
  def standarized_coeffs
120
123
  assign_names(@coeffs_stan)
121
124
  end
122
-
125
+ # Total sum of squares
123
126
  def sst
124
127
  @y_sd**2*(cases-1.0)
125
128
  end
@@ -134,9 +137,11 @@ class MatrixEngine < BaseEngine
134
137
  end
135
138
 
136
139
  # Tolerance for a given variable
137
- # defined as (1-r2) of regression of other independent variables
140
+ # defined as (1-R^2) of regression of other independent variables
138
141
  # over the selected
139
- # http://talkstats.com/showthread.php?t=5056
142
+ # Reference:
143
+ #
144
+ # * http://talkstats.com/showthread.php?t=5056
140
145
  def tolerance(var)
141
146
  lr=Statsample::Regression::Multiple::MatrixEngine.new(@matrix_x, var)
142
147
  1-lr.r2
@@ -146,7 +151,8 @@ class MatrixEngine < BaseEngine
146
151
  # * Tolerance of the coeffients: Higher tolerances implies higher error
147
152
  # * Higher r2 implies lower error
148
153
 
149
- # Reference: Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
154
+ # Reference:
155
+ # * Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
150
156
  #
151
157
  def coeffs_se
152
158
  out={}
@@ -188,7 +194,7 @@ class MatrixEngine < BaseEngine
188
194
  matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
189
195
  end
190
196
 
191
- def to_reportbuilder(generator)
197
+ def to_reportbuilder(generator) # :nodoc:
192
198
  anchor=generator.add_toc_entry(_("Multiple Regression: ")+@name)
193
199
  generator.add_html "<div class='multiple-regression'>#{@name}<a name='#{anchor}'></a>"
194
200
  c=coeffs
@@ -1,9 +1,7 @@
1
1
  require 'statsample/regression/multiple/baseengine'
2
2
  module Statsample
3
3
  module Regression
4
- # Module for Linear Multiple Regression Analysis.
5
- #
6
- # You can call Statsample::Regression::Multiple.listwise, Statsample::Regression::Multiple.pairwise or instance directly the engines.
4
+ # Module for OLS Multiple Regression Analysis.
7
5
  #
8
6
  # Use:.
9
7
  #
@@ -13,7 +11,7 @@ module Statsample
13
11
  # c=1000.times.collect {rand}.to_scale
14
12
  # ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
15
13
  # ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
16
- # lr=Statsample::Regression::Multiple.listwise(ds,'y')
14
+ # lr=Statsample::Regression.multiple(ds,'y')
17
15
  # puts lr.summary
18
16
  # Summary for regression of a,b,c over y
19
17
  # *************************************************************
@@ -42,29 +40,6 @@ module Statsample
42
40
  # -----------------------------------------------
43
41
  #
44
42
  module Multiple
45
- # Creates an object for listwise regression.
46
- # Alglib is faster, so is prefered over GSL
47
- # lr=Statsample::Regression::Multiple.listwise(ds,'y')
48
- def self.listwise(ds,y_var)
49
- if HAS_ALGIB
50
- AlglibEngine.new(ds,y_var)
51
- elsif HAS_GSL
52
- GslEngine.new(ds,y_var)
53
- else
54
- ds2=ds.dup_only_valid
55
- RubyEngine.new(ds2,y_var)
56
- end
57
- end
58
-
59
- # Creates an object for pairwise regression
60
- # For now, always retrieves a RubyEngine
61
- # lr=Statsample::Regression::Multiple.listwise(ds,'y')
62
- def self.pairwise(ds,y_var)
63
- RubyEngine.new(ds,y_var)
64
- end
65
- def self.listwise_by_exp(ds,exp)
66
- raise "Not implemented yet"
67
- end
68
43
  # Obtain r2 for regressors
69
44
  def self.r2_from_matrices(rxx,rxy)
70
45
  matrix=(rxy.transpose*rxx.inverse*rxy)
@@ -76,21 +51,19 @@ module Statsample
76
51
  0.0
77
52
  end
78
53
  def initialize(matrix,y_var, opts=Hash.new)
79
- matrix.extend Statsample::CovariateMatrix
80
- @matrix=matrix
81
- @fields=matrix.fields-y_var
82
- @y_var=y_var
83
- @q=@y_var.size
84
- @matrix_cor=matrix.correlation
85
- @matrix_cor_xx = @matrix_cor.submatrix(@fields)
86
- @matrix_cor_yy = @matrix_cor.submatrix(y_var, y_var)
87
-
88
- @sxx = @matrix.submatrix(@fields)
89
- @syy = @matrix.submatrix(y_var, y_var)
90
- @sxy = @matrix.submatrix(@fields, y_var)
91
- @syx = @sxy.t
92
-
93
-
54
+ matrix.extend Statsample::CovariateMatrix
55
+ @matrix=matrix
56
+ @fields=matrix.fields-y_var
57
+ @y_var=y_var
58
+ @q=@y_var.size
59
+ @matrix_cor=matrix.correlation
60
+ @matrix_cor_xx = @matrix_cor.submatrix(@fields)
61
+ @matrix_cor_yy = @matrix_cor.submatrix(y_var, y_var)
62
+
63
+ @sxx = @matrix.submatrix(@fields)
64
+ @syy = @matrix.submatrix(y_var, y_var)
65
+ @sxy = @matrix.submatrix(@fields, y_var)
66
+ @syx = @sxy.t
94
67
  end
95
68
 
96
69
  def r2yx
@@ -1,81 +1,96 @@
1
1
  module Statsample
2
- module Regression
3
- # Class for calculation of linear regressions with form
4
- # y = a+bx
5
- # To create a SimpleRegression object:
6
- # * <tt> SimpleRegression.new_from_vectors(vx,vy)</tt>
7
- # * <tt> SimpleRegression.new_from_gsl(gsl) </tt>
8
- #
9
- class Simple
10
- attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
11
- private_class_method :new
12
- def initialize(init_method, *argv)
13
- self.send(init_method, *argv)
14
- end
15
- def y(val_x)
16
- @a+@b*val_x
17
- end
18
- def x(val_y)
19
- (val_y-@a) / @b.to_f
20
- end
21
- # Sum of square error
22
- def sse
23
- (0...@vx.size).inject(0) {|acum,i|
24
- acum+((@vy[i]-y(@vx[i]))**2)
25
- }
26
- end
27
- def standard_error
28
- Math::sqrt(sse / (@vx.size-2).to_f)
29
- end
30
- # Sum of square regression
31
- def ssr
32
- vy_mean=@vy.mean
33
- (0...@vx.size).inject(0) {|a,i|
34
- a+((y(@vx[i])-vy_mean)**2)
35
- }
36
-
37
- end
38
- # Sum of square total
39
- def sst
40
- @vy.sum_of_squared_deviation
41
- end
42
- # Value of r
43
- def r
44
- @b * (@vx.sds / @vy.sds)
45
- end
46
- # Value of r^2
47
- def r2
48
- r**2
49
- end
50
- class << self
51
- def new_from_gsl(ar)
52
- new(:init_gsl, *ar)
53
- end
54
- def new_from_vectors(vx,vy)
55
- new(:init_vectors,vx,vy)
56
- end
57
- end
58
- def init_vectors(vx,vy)
59
- @vx,@vy=Statsample.only_valid(vx,vy)
60
- x_m=@vx.mean
61
- y_m=@vy.mean
62
- num=den=0
63
- (0...@vx.size).each {|i|
64
- num+=(@vx[i]-x_m)*(@vy[i]-y_m)
65
- den+=(@vx[i]-x_m)**2
66
- }
67
- @b=num.to_f/den
68
- @a=y_m - @b*x_m
69
- end
70
- def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
71
- @a=a
72
- @b=b
73
- @cov00=cov00
74
- @cov01=cov01
75
- @covx1=covx1
76
- @chisq=chisq
77
- @status=status
78
- end
79
- end
2
+ module Regression
3
+ # Class for calculation of linear regressions with form
4
+ # y = a+bx
5
+ # To create a SimpleRegression object:
6
+ # * <tt> SimpleRegression.new_from_dataset(ds,x,y)</tt>
7
+ # * <tt> SimpleRegression.new_from_vectors(vx,vy)</tt>
8
+ # * <tt> SimpleRegression.new_from_gsl(gsl) </tt>
9
+ #
10
+ class Simple
11
+ attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
12
+
13
+ def initialize(init_method, *argv)
14
+ self.send(init_method, *argv)
15
+ end
16
+ private_class_method :new
17
+ # Obtain y value given x value
18
+ # x=a+bx
19
+
20
+ def y(val_x)
21
+ @a+@b*val_x
22
+ end
23
+ # Obtain x value given y value
24
+ # x=(y-a)/b
25
+ def x(val_y)
26
+ (val_y-@a) / @b.to_f
27
+ end
28
+ # Sum of square error
29
+ def sse
30
+ (0...@vx.size).inject(0) {|acum,i| acum+((@vy[i]-y(@vx[i]))**2)
31
+ }
32
+ end
33
+ def standard_error
34
+ Math::sqrt(sse / (@vx.size-2).to_f)
35
+ end
36
+ # Sum of square regression
37
+ def ssr
38
+ vy_mean=@vy.mean
39
+ (0...@vx.size).inject(0) {|a,i|
40
+ a+((y(@vx[i])-vy_mean)**2)
41
+ }
42
+
43
+ end
44
+ # Sum of square total
45
+ def sst
46
+ @vy.sum_of_squared_deviation
47
+ end
48
+ # Value of r
49
+ def r
50
+ @b * (@vx.sds / @vy.sds)
51
+ end
52
+ # Value of r^2
53
+ def r2
54
+ r**2
55
+ end
56
+ class << self
57
+ # Create a regression object giving an array with following parameters:
58
+ # <tt>a,b,cov00, cov01, covx1, chisq, status</tt>
59
+ # Useful to obtain x and y values with a and b values.
60
+ def new_from_gsl(ar)
61
+ new(:init_gsl, *ar)
62
+ end
63
+ # Create a simple regression using two vectors
64
+ def new_from_vectors(vx,vy)
65
+ new(:init_vectors,vx,vy)
66
+ end
67
+ # Create a simple regression using a dataset and two vector names.
68
+ def new_from_dataset(ds,x,y)
69
+ new(:init_vectors,ds[x],ds[y])
70
+ end
71
+ end
72
+ def init_vectors(vx,vy)
73
+ @vx,@vy=Statsample.only_valid(vx,vy)
74
+ x_m=@vx.mean
75
+ y_m=@vy.mean
76
+ num=den=0
77
+ (0...@vx.size).each {|i|
78
+ num+=(@vx[i]-x_m)*(@vy[i]-y_m)
79
+ den+=(@vx[i]-x_m)**2
80
+ }
81
+ @b=num.to_f/den
82
+ @a=y_m - @b*x_m
83
+ end
84
+ def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
85
+ @a=a
86
+ @b=b
87
+ @cov00=cov00
88
+ @cov01=cov01
89
+ @covx1=covx1
90
+ @chisq=chisq
91
+ @status=status
92
+ end
93
+ private :init_vectors, :init_gsl
80
94
  end
95
+ end
81
96
  end
@@ -2,7 +2,6 @@ require 'statsample/regression/simple'
2
2
  require 'statsample/regression/multiple'
3
3
 
4
4
  require 'statsample/regression/multiple/matrixengine'
5
- require 'statsample/regression/multiple/alglibengine'
6
5
  require 'statsample/regression/multiple/rubyengine'
7
6
  require 'statsample/regression/multiple/gslengine'
8
7
 
@@ -11,7 +10,80 @@ require 'statsample/regression/binomial/logit'
11
10
  require 'statsample/regression/binomial/probit'
12
11
 
13
12
  module Statsample
14
- # Module for regression procedures.
13
+ # = Module for regression procedures.
14
+ # Use the method on this class to generate
15
+ # analysis.
16
+ # If you need more control, you can
17
+ # create and control directly the objects who computes
18
+ # the regressions.
19
+ #
20
+ # * Simple Regression : Statsample::Regression::Simple
21
+ # * Multiple Regression: Statsample::Regression::Multiple
22
+ # * Logit Regression: Statsample::Regression::Binomial::Logit
23
+ # * Probit Regression: Statsample::Regression::Binomial::Probit
15
24
  module Regression
25
+ # Create a Statsample::Regression::Simple object, for simple regression
26
+ # * x: independent Vector
27
+ # * y: dependent Vector
28
+ # <b>Usage:</b>
29
+ # x=100.times.collect {|i| rand(100)}.to_scale
30
+ # y=100.times.collect {|i| 2+x[i]*2+rand()}.to_scale
31
+ # sr=Statsample::Regression.simple(x,y)
32
+ # sr.a
33
+ # => 2.51763295177808
34
+ # sr.b
35
+ # => 1.99973746599856
36
+ # sr.r
37
+ # => 0.999987881153254
38
+
39
+ def self.simple(x,y)
40
+ Statsample::Regression::Simple.new_from_vectors(x,y)
41
+ end
42
+ # Create a Binomial::Logit object, for logit regression.
43
+ # * ds:: Dataset
44
+ # * y:: Name of dependent vector
45
+ # <b>Usage</b>
46
+ # dataset=Statsample::CSV.read("data.csv")
47
+ # lr=Statsample::Regression.logit(dataset,'y')
48
+ #
49
+ def self.logit(ds,y_var)
50
+ Statsample::Regression::Binomial::Logit.new(ds,y_var)
51
+ end
52
+ # Create a Binomial::Probit object, for probit regression
53
+ # * ds:: Dataset
54
+ # * y:: Name of dependent vector
55
+ # <b>Usage</b>
56
+ # dataset=Statsample::CSV.read("data.csv")
57
+ # lr=Statsample::Regression.probit(dataset,'y')
58
+ #
59
+
60
+ def self.probit(ds,y_var)
61
+ Statsample::Regression::Binomial::Probit.new(ds,y_var)
62
+ end
63
+
64
+
65
+ # Creates one of the Statsample::Regression::Multiple object,
66
+ # for OLS multiple regression.
67
+ # Parameters:
68
+ # * ds: Dataset.
69
+ # * y: Name of dependent variable.
70
+ # * missing_data: Could be
71
+ # * :listwise: delete cases with one or more empty data (default).
72
+ # * :pairwise: uses correlation matrix. Use with caution.
73
+ #
74
+ # <b>Usage:</b>
75
+ # lr=Statsample::Regression::multiple(ds,'y')
76
+ def self.multiple(ds,y_var, missing_data=:listwise)
77
+ if missing_data==:pairwise
78
+ RubyEngine.new(ds,y_var)
79
+ else
80
+ if Statsample.has_gsl?
81
+ Statsample::Regression::Multiple::GslEngine.new(ds,y_var)
82
+ else
83
+ ds2=ds.dup_only_valid
84
+ Statsample::Regression::Multiple::RubyEngine.new(ds2,y_var)
85
+ end
86
+ end
87
+ end
16
88
  end
17
89
  end