statsample 0.6.5 → 0.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/History.txt +15 -0
  2. data/Manifest.txt +6 -0
  3. data/README.txt +30 -12
  4. data/Rakefile +91 -0
  5. data/demo/levene.rb +9 -0
  6. data/demo/multiple_regression.rb +1 -7
  7. data/demo/polychoric.rb +1 -0
  8. data/demo/principal_axis.rb +8 -0
  9. data/lib/distribution/f.rb +22 -22
  10. data/lib/spss.rb +99 -99
  11. data/lib/statsample/bivariate/polychoric.rb +32 -22
  12. data/lib/statsample/bivariate/tetrachoric.rb +212 -207
  13. data/lib/statsample/bivariate.rb +6 -6
  14. data/lib/statsample/codification.rb +65 -65
  15. data/lib/statsample/combination.rb +60 -59
  16. data/lib/statsample/converter/csv19.rb +12 -12
  17. data/lib/statsample/converters.rb +1 -1
  18. data/lib/statsample/dataset.rb +93 -36
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
  20. data/lib/statsample/dominanceanalysis.rb +5 -6
  21. data/lib/statsample/factor/pca.rb +41 -11
  22. data/lib/statsample/factor/principalaxis.rb +105 -29
  23. data/lib/statsample/factor/rotation.rb +20 -3
  24. data/lib/statsample/factor.rb +1 -1
  25. data/lib/statsample/graph/gdchart.rb +13 -13
  26. data/lib/statsample/graph/svggraph.rb +166 -167
  27. data/lib/statsample/matrix.rb +22 -12
  28. data/lib/statsample/mle/logit.rb +3 -2
  29. data/lib/statsample/mle/probit.rb +7 -5
  30. data/lib/statsample/mle.rb +4 -2
  31. data/lib/statsample/multiset.rb +125 -124
  32. data/lib/statsample/permutation.rb +2 -1
  33. data/lib/statsample/regression/binomial/logit.rb +4 -3
  34. data/lib/statsample/regression/binomial/probit.rb +2 -1
  35. data/lib/statsample/regression/binomial.rb +62 -81
  36. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  37. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  38. data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
  39. data/lib/statsample/regression/multiple.rb +15 -42
  40. data/lib/statsample/regression/simple.rb +93 -78
  41. data/lib/statsample/regression.rb +74 -2
  42. data/lib/statsample/reliability.rb +117 -120
  43. data/lib/statsample/srs.rb +156 -153
  44. data/lib/statsample/test/levene.rb +90 -0
  45. data/lib/statsample/test/umannwhitney.rb +25 -9
  46. data/lib/statsample/test.rb +2 -0
  47. data/lib/statsample/vector.rb +388 -413
  48. data/lib/statsample.rb +74 -30
  49. data/po/es/statsample.mo +0 -0
  50. data/test/test_bivariate.rb +5 -4
  51. data/test/test_combination.rb +1 -1
  52. data/test/test_dataset.rb +2 -2
  53. data/test/test_factor.rb +53 -6
  54. data/test/test_gsl.rb +1 -1
  55. data/test/test_mle.rb +1 -1
  56. data/test/test_regression.rb +18 -33
  57. data/test/test_statistics.rb +15 -33
  58. data/test/test_stest.rb +35 -0
  59. data/test/test_svg_graph.rb +2 -2
  60. data/test/test_vector.rb +331 -333
  61. metadata +38 -11
@@ -1,4 +1,4 @@
1
- if HAS_GSL
1
+ if Statsample.has_gsl?
2
2
  module Statsample
3
3
  module Regression
4
4
  module Multiple
@@ -101,7 +101,7 @@ class MatrixEngine < BaseEngine
101
101
  # Get R^2 for the regression
102
102
  # Equal to
103
103
  # * 1-(|R| / |R_x|) or
104
- # * Sum(b_i*r_yi)
104
+ # * Sum(b_i*r_yi) <- used
105
105
  def r2
106
106
  @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
107
107
  end
@@ -113,13 +113,16 @@ class MatrixEngine < BaseEngine
113
113
  c=coeffs
114
114
  @y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
115
115
  end
116
+ # Hash of b or raw coefficients
116
117
  def coeffs
117
118
  assign_names(@coeffs)
118
119
  end
120
+ # Hash of beta or standarized coefficients
121
+
119
122
  def standarized_coeffs
120
123
  assign_names(@coeffs_stan)
121
124
  end
122
-
125
+ # Total sum of squares
123
126
  def sst
124
127
  @y_sd**2*(cases-1.0)
125
128
  end
@@ -134,9 +137,11 @@ class MatrixEngine < BaseEngine
134
137
  end
135
138
 
136
139
  # Tolerance for a given variable
137
- # defined as (1-r2) of regression of other independent variables
140
+ # defined as (1-R^2) of regression of other independent variables
138
141
  # over the selected
139
- # http://talkstats.com/showthread.php?t=5056
142
+ # Reference:
143
+ #
144
+ # * http://talkstats.com/showthread.php?t=5056
140
145
  def tolerance(var)
141
146
  lr=Statsample::Regression::Multiple::MatrixEngine.new(@matrix_x, var)
142
147
  1-lr.r2
@@ -146,7 +151,8 @@ class MatrixEngine < BaseEngine
146
151
  # * Tolerance of the coeffients: Higher tolerances implies higher error
147
152
  # * Higher r2 implies lower error
148
153
 
149
- # Reference: Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
154
+ # Reference:
155
+ # * Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
150
156
  #
151
157
  def coeffs_se
152
158
  out={}
@@ -188,7 +194,7 @@ class MatrixEngine < BaseEngine
188
194
  matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
189
195
  end
190
196
 
191
- def to_reportbuilder(generator)
197
+ def to_reportbuilder(generator) # :nodoc:
192
198
  anchor=generator.add_toc_entry(_("Multiple Regression: ")+@name)
193
199
  generator.add_html "<div class='multiple-regression'>#{@name}<a name='#{anchor}'></a>"
194
200
  c=coeffs
@@ -1,9 +1,7 @@
1
1
  require 'statsample/regression/multiple/baseengine'
2
2
  module Statsample
3
3
  module Regression
4
- # Module for Linear Multiple Regression Analysis.
5
- #
6
- # You can call Statsample::Regression::Multiple.listwise, Statsample::Regression::Multiple.pairwise or instance directly the engines.
4
+ # Module for OLS Multiple Regression Analysis.
7
5
  #
8
6
  # Use:.
9
7
  #
@@ -13,7 +11,7 @@ module Statsample
13
11
  # c=1000.times.collect {rand}.to_scale
14
12
  # ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
15
13
  # ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
16
- # lr=Statsample::Regression::Multiple.listwise(ds,'y')
14
+ # lr=Statsample::Regression.multiple(ds,'y')
17
15
  # puts lr.summary
18
16
  # Summary for regression of a,b,c over y
19
17
  # *************************************************************
@@ -42,29 +40,6 @@ module Statsample
42
40
  # -----------------------------------------------
43
41
  #
44
42
  module Multiple
45
- # Creates an object for listwise regression.
46
- # Alglib is faster, so is prefered over GSL
47
- # lr=Statsample::Regression::Multiple.listwise(ds,'y')
48
- def self.listwise(ds,y_var)
49
- if HAS_ALGIB
50
- AlglibEngine.new(ds,y_var)
51
- elsif HAS_GSL
52
- GslEngine.new(ds,y_var)
53
- else
54
- ds2=ds.dup_only_valid
55
- RubyEngine.new(ds2,y_var)
56
- end
57
- end
58
-
59
- # Creates an object for pairwise regression
60
- # For now, always retrieves a RubyEngine
61
- # lr=Statsample::Regression::Multiple.listwise(ds,'y')
62
- def self.pairwise(ds,y_var)
63
- RubyEngine.new(ds,y_var)
64
- end
65
- def self.listwise_by_exp(ds,exp)
66
- raise "Not implemented yet"
67
- end
68
43
  # Obtain r2 for regressors
69
44
  def self.r2_from_matrices(rxx,rxy)
70
45
  matrix=(rxy.transpose*rxx.inverse*rxy)
@@ -76,21 +51,19 @@ module Statsample
76
51
  0.0
77
52
  end
78
53
  def initialize(matrix,y_var, opts=Hash.new)
79
- matrix.extend Statsample::CovariateMatrix
80
- @matrix=matrix
81
- @fields=matrix.fields-y_var
82
- @y_var=y_var
83
- @q=@y_var.size
84
- @matrix_cor=matrix.correlation
85
- @matrix_cor_xx = @matrix_cor.submatrix(@fields)
86
- @matrix_cor_yy = @matrix_cor.submatrix(y_var, y_var)
87
-
88
- @sxx = @matrix.submatrix(@fields)
89
- @syy = @matrix.submatrix(y_var, y_var)
90
- @sxy = @matrix.submatrix(@fields, y_var)
91
- @syx = @sxy.t
92
-
93
-
54
+ matrix.extend Statsample::CovariateMatrix
55
+ @matrix=matrix
56
+ @fields=matrix.fields-y_var
57
+ @y_var=y_var
58
+ @q=@y_var.size
59
+ @matrix_cor=matrix.correlation
60
+ @matrix_cor_xx = @matrix_cor.submatrix(@fields)
61
+ @matrix_cor_yy = @matrix_cor.submatrix(y_var, y_var)
62
+
63
+ @sxx = @matrix.submatrix(@fields)
64
+ @syy = @matrix.submatrix(y_var, y_var)
65
+ @sxy = @matrix.submatrix(@fields, y_var)
66
+ @syx = @sxy.t
94
67
  end
95
68
 
96
69
  def r2yx
@@ -1,81 +1,96 @@
1
1
  module Statsample
2
- module Regression
3
- # Class for calculation of linear regressions with form
4
- # y = a+bx
5
- # To create a SimpleRegression object:
6
- # * <tt> SimpleRegression.new_from_vectors(vx,vy)</tt>
7
- # * <tt> SimpleRegression.new_from_gsl(gsl) </tt>
8
- #
9
- class Simple
10
- attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
11
- private_class_method :new
12
- def initialize(init_method, *argv)
13
- self.send(init_method, *argv)
14
- end
15
- def y(val_x)
16
- @a+@b*val_x
17
- end
18
- def x(val_y)
19
- (val_y-@a) / @b.to_f
20
- end
21
- # Sum of square error
22
- def sse
23
- (0...@vx.size).inject(0) {|acum,i|
24
- acum+((@vy[i]-y(@vx[i]))**2)
25
- }
26
- end
27
- def standard_error
28
- Math::sqrt(sse / (@vx.size-2).to_f)
29
- end
30
- # Sum of square regression
31
- def ssr
32
- vy_mean=@vy.mean
33
- (0...@vx.size).inject(0) {|a,i|
34
- a+((y(@vx[i])-vy_mean)**2)
35
- }
36
-
37
- end
38
- # Sum of square total
39
- def sst
40
- @vy.sum_of_squared_deviation
41
- end
42
- # Value of r
43
- def r
44
- @b * (@vx.sds / @vy.sds)
45
- end
46
- # Value of r^2
47
- def r2
48
- r**2
49
- end
50
- class << self
51
- def new_from_gsl(ar)
52
- new(:init_gsl, *ar)
53
- end
54
- def new_from_vectors(vx,vy)
55
- new(:init_vectors,vx,vy)
56
- end
57
- end
58
- def init_vectors(vx,vy)
59
- @vx,@vy=Statsample.only_valid(vx,vy)
60
- x_m=@vx.mean
61
- y_m=@vy.mean
62
- num=den=0
63
- (0...@vx.size).each {|i|
64
- num+=(@vx[i]-x_m)*(@vy[i]-y_m)
65
- den+=(@vx[i]-x_m)**2
66
- }
67
- @b=num.to_f/den
68
- @a=y_m - @b*x_m
69
- end
70
- def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
71
- @a=a
72
- @b=b
73
- @cov00=cov00
74
- @cov01=cov01
75
- @covx1=covx1
76
- @chisq=chisq
77
- @status=status
78
- end
79
- end
2
+ module Regression
3
+ # Class for calculation of linear regressions with form
4
+ # y = a+bx
5
+ # To create a SimpleRegression object:
6
+ # * <tt> SimpleRegression.new_from_dataset(ds,x,y)</tt>
7
+ # * <tt> SimpleRegression.new_from_vectors(vx,vy)</tt>
8
+ # * <tt> SimpleRegression.new_from_gsl(gsl) </tt>
9
+ #
10
+ class Simple
11
+ attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
12
+
13
+ def initialize(init_method, *argv)
14
+ self.send(init_method, *argv)
15
+ end
16
+ private_class_method :new
17
+ # Obtain y value given x value
18
+ # x=a+bx
19
+
20
+ def y(val_x)
21
+ @a+@b*val_x
22
+ end
23
+ # Obtain x value given y value
24
+ # x=(y-a)/b
25
+ def x(val_y)
26
+ (val_y-@a) / @b.to_f
27
+ end
28
+ # Sum of square error
29
+ def sse
30
+ (0...@vx.size).inject(0) {|acum,i| acum+((@vy[i]-y(@vx[i]))**2)
31
+ }
32
+ end
33
+ def standard_error
34
+ Math::sqrt(sse / (@vx.size-2).to_f)
35
+ end
36
+ # Sum of square regression
37
+ def ssr
38
+ vy_mean=@vy.mean
39
+ (0...@vx.size).inject(0) {|a,i|
40
+ a+((y(@vx[i])-vy_mean)**2)
41
+ }
42
+
43
+ end
44
+ # Sum of square total
45
+ def sst
46
+ @vy.sum_of_squared_deviation
47
+ end
48
+ # Value of r
49
+ def r
50
+ @b * (@vx.sds / @vy.sds)
51
+ end
52
+ # Value of r^2
53
+ def r2
54
+ r**2
55
+ end
56
+ class << self
57
+ # Create a regression object giving an array with following parameters:
58
+ # <tt>a,b,cov00, cov01, covx1, chisq, status</tt>
59
+ # Useful to obtain x and y values with a and b values.
60
+ def new_from_gsl(ar)
61
+ new(:init_gsl, *ar)
62
+ end
63
+ # Create a simple regression using two vectors
64
+ def new_from_vectors(vx,vy)
65
+ new(:init_vectors,vx,vy)
66
+ end
67
+ # Create a simple regression using a dataset and two vector names.
68
+ def new_from_dataset(ds,x,y)
69
+ new(:init_vectors,ds[x],ds[y])
70
+ end
71
+ end
72
+ def init_vectors(vx,vy)
73
+ @vx,@vy=Statsample.only_valid(vx,vy)
74
+ x_m=@vx.mean
75
+ y_m=@vy.mean
76
+ num=den=0
77
+ (0...@vx.size).each {|i|
78
+ num+=(@vx[i]-x_m)*(@vy[i]-y_m)
79
+ den+=(@vx[i]-x_m)**2
80
+ }
81
+ @b=num.to_f/den
82
+ @a=y_m - @b*x_m
83
+ end
84
+ def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
85
+ @a=a
86
+ @b=b
87
+ @cov00=cov00
88
+ @cov01=cov01
89
+ @covx1=covx1
90
+ @chisq=chisq
91
+ @status=status
92
+ end
93
+ private :init_vectors, :init_gsl
80
94
  end
95
+ end
81
96
  end
@@ -2,7 +2,6 @@ require 'statsample/regression/simple'
2
2
  require 'statsample/regression/multiple'
3
3
 
4
4
  require 'statsample/regression/multiple/matrixengine'
5
- require 'statsample/regression/multiple/alglibengine'
6
5
  require 'statsample/regression/multiple/rubyengine'
7
6
  require 'statsample/regression/multiple/gslengine'
8
7
 
@@ -11,7 +10,80 @@ require 'statsample/regression/binomial/logit'
11
10
  require 'statsample/regression/binomial/probit'
12
11
 
13
12
  module Statsample
14
- # Module for regression procedures.
13
+ # = Module for regression procedures.
14
+ # Use the method on this class to generate
15
+ # analysis.
16
+ # If you need more control, you can
17
+ # create and control directly the objects who computes
18
+ # the regressions.
19
+ #
20
+ # * Simple Regression : Statsample::Regression::Simple
21
+ # * Multiple Regression: Statsample::Regression::Multiple
22
+ # * Logit Regression: Statsample::Regression::Binomial::Logit
23
+ # * Probit Regression: Statsample::Regression::Binomial::Probit
15
24
  module Regression
25
+ # Create a Statsample::Regression::Simple object, for simple regression
26
+ # * x: independent Vector
27
+ # * y: dependent Vector
28
+ # <b>Usage:</b>
29
+ # x=100.times.collect {|i| rand(100)}.to_scale
30
+ # y=100.times.collect {|i| 2+x[i]*2+rand()}.to_scale
31
+ # sr=Statsample::Regression.simple(x,y)
32
+ # sr.a
33
+ # => 2.51763295177808
34
+ # sr.b
35
+ # => 1.99973746599856
36
+ # sr.r
37
+ # => 0.999987881153254
38
+
39
+ def self.simple(x,y)
40
+ Statsample::Regression::Simple.new_from_vectors(x,y)
41
+ end
42
+ # Create a Binomial::Logit object, for logit regression.
43
+ # * ds:: Dataset
44
+ # * y:: Name of dependent vector
45
+ # <b>Usage</b>
46
+ # dataset=Statsample::CSV.read("data.csv")
47
+ # lr=Statsample::Regression.logit(dataset,'y')
48
+ #
49
+ def self.logit(ds,y_var)
50
+ Statsample::Regression::Binomial::Logit.new(ds,y_var)
51
+ end
52
+ # Create a Binomial::Probit object, for probit regression
53
+ # * ds:: Dataset
54
+ # * y:: Name of dependent vector
55
+ # <b>Usage</b>
56
+ # dataset=Statsample::CSV.read("data.csv")
57
+ # lr=Statsample::Regression.probit(dataset,'y')
58
+ #
59
+
60
+ def self.probit(ds,y_var)
61
+ Statsample::Regression::Binomial::Probit.new(ds,y_var)
62
+ end
63
+
64
+
65
+ # Creates one of the Statsample::Regression::Multiple object,
66
+ # for OLS multiple regression.
67
+ # Parameters:
68
+ # * ds: Dataset.
69
+ # * y: Name of dependent variable.
70
+ # * missing_data: Could be
71
+ # * :listwise: delete cases with one or more empty data (default).
72
+ # * :pairwise: uses correlation matrix. Use with caution.
73
+ #
74
+ # <b>Usage:</b>
75
+ # lr=Statsample::Regression::multiple(ds,'y')
76
+ def self.multiple(ds,y_var, missing_data=:listwise)
77
+ if missing_data==:pairwise
78
+ RubyEngine.new(ds,y_var)
79
+ else
80
+ if Statsample.has_gsl?
81
+ Statsample::Regression::Multiple::GslEngine.new(ds,y_var)
82
+ else
83
+ ds2=ds.dup_only_valid
84
+ Statsample::Regression::Multiple::RubyEngine.new(ds2,y_var)
85
+ end
86
+ end
87
+ end
16
88
  end
17
89
  end