statsample 0.6.5 → 0.6.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/History.txt +15 -0
  2. data/Manifest.txt +6 -0
  3. data/README.txt +30 -12
  4. data/Rakefile +91 -0
  5. data/demo/levene.rb +9 -0
  6. data/demo/multiple_regression.rb +1 -7
  7. data/demo/polychoric.rb +1 -0
  8. data/demo/principal_axis.rb +8 -0
  9. data/lib/distribution/f.rb +22 -22
  10. data/lib/spss.rb +99 -99
  11. data/lib/statsample/bivariate/polychoric.rb +32 -22
  12. data/lib/statsample/bivariate/tetrachoric.rb +212 -207
  13. data/lib/statsample/bivariate.rb +6 -6
  14. data/lib/statsample/codification.rb +65 -65
  15. data/lib/statsample/combination.rb +60 -59
  16. data/lib/statsample/converter/csv19.rb +12 -12
  17. data/lib/statsample/converters.rb +1 -1
  18. data/lib/statsample/dataset.rb +93 -36
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
  20. data/lib/statsample/dominanceanalysis.rb +5 -6
  21. data/lib/statsample/factor/pca.rb +41 -11
  22. data/lib/statsample/factor/principalaxis.rb +105 -29
  23. data/lib/statsample/factor/rotation.rb +20 -3
  24. data/lib/statsample/factor.rb +1 -1
  25. data/lib/statsample/graph/gdchart.rb +13 -13
  26. data/lib/statsample/graph/svggraph.rb +166 -167
  27. data/lib/statsample/matrix.rb +22 -12
  28. data/lib/statsample/mle/logit.rb +3 -2
  29. data/lib/statsample/mle/probit.rb +7 -5
  30. data/lib/statsample/mle.rb +4 -2
  31. data/lib/statsample/multiset.rb +125 -124
  32. data/lib/statsample/permutation.rb +2 -1
  33. data/lib/statsample/regression/binomial/logit.rb +4 -3
  34. data/lib/statsample/regression/binomial/probit.rb +2 -1
  35. data/lib/statsample/regression/binomial.rb +62 -81
  36. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  37. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  38. data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
  39. data/lib/statsample/regression/multiple.rb +15 -42
  40. data/lib/statsample/regression/simple.rb +93 -78
  41. data/lib/statsample/regression.rb +74 -2
  42. data/lib/statsample/reliability.rb +117 -120
  43. data/lib/statsample/srs.rb +156 -153
  44. data/lib/statsample/test/levene.rb +90 -0
  45. data/lib/statsample/test/umannwhitney.rb +25 -9
  46. data/lib/statsample/test.rb +2 -0
  47. data/lib/statsample/vector.rb +388 -413
  48. data/lib/statsample.rb +74 -30
  49. data/po/es/statsample.mo +0 -0
  50. data/test/test_bivariate.rb +5 -4
  51. data/test/test_combination.rb +1 -1
  52. data/test/test_dataset.rb +2 -2
  53. data/test/test_factor.rb +53 -6
  54. data/test/test_gsl.rb +1 -1
  55. data/test/test_mle.rb +1 -1
  56. data/test/test_regression.rb +18 -33
  57. data/test/test_statistics.rb +15 -33
  58. data/test/test_stest.rb +35 -0
  59. data/test/test_svg_graph.rb +2 -2
  60. data/test/test_vector.rb +331 -333
  61. metadata +38 -11
@@ -92,33 +92,43 @@ module GSL
92
92
  end
93
93
 
94
94
  module Statsample
95
- # Method for variance/covariance and correlation matrices
95
+ # Module to add method for variance/covariance and correlation matrices
96
+ # == Usage
97
+ # matrix=Matrix[[1,2],[2,3]]
98
+ # matrix.extend CovariateMatrix
99
+ #
96
100
  module CovariateMatrix
101
+ # Gives a nice
97
102
  def summary
98
103
  rp=ReportBuilder.new()
99
104
  rp.add(self)
100
105
  rp.to_text
101
106
  end
102
- def type=(v)
103
- @type=v
104
- end
107
+ # Get type of covariate matrix. Could be :covariance or :correlation
105
108
  def type
106
- if row_size.times.find {|i| self[i,i]!=1.0}
107
- :covariance
109
+ if row_size==column_size
110
+ if row_size.times.find {|i| self[i,i]!=1.0}
111
+ :covariance
112
+ else
113
+ :correlation
114
+ end
108
115
  else
109
- :correlation
116
+ @type
110
117
  end
111
118
 
112
119
  end
120
+ def type=(t)
121
+ @type=t
122
+ end
113
123
  def correlation
114
124
  if(type==:covariance)
115
125
  matrix=Matrix.rows(row_size.times.collect { |i|
116
126
  column_size.times.collect { |j|
117
- if i==j
118
- 1.0
119
- else
120
- self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
121
- end
127
+ if i==j
128
+ 1.0
129
+ else
130
+ self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
131
+ end
122
132
  }
123
133
  })
124
134
  matrix.extend CovariateMatrix
@@ -1,12 +1,13 @@
1
1
  module Statsample
2
2
  module MLE
3
- # Logit MLE estimation.
3
+ # Logit MLE estimation.
4
+ # See Statsample::Regression for methods to generate a logit regression.
4
5
  # Usage:
5
6
  #
6
7
  # mle=Statsample::MLE::Logit.new
7
8
  # mle.newton_raphson(x,y)
8
9
  # beta=mle.parameters
9
- # likehood=mle.likehood(x,y,beta)
10
+ # likehood=mle.likehood(x, y, beta)
10
11
  # iterations=mle.iterations
11
12
  #
12
13
  class Logit < BaseMLE
@@ -1,7 +1,9 @@
1
1
  module Statsample
2
2
  module MLE
3
3
  # Probit MLE estimation.
4
- # Usage:
4
+ # See Statsample::Regression for methods to generate a probit regression.
5
+ #
6
+ # == Usage:
5
7
  #
6
8
  # mle=Statsample::MLE::Probit.new
7
9
  # mle.newton_raphson(x,y)
@@ -10,7 +12,7 @@ module Statsample
10
12
  # iterations=mle.iterations
11
13
  class Probit < BaseMLE
12
14
  # F(B'Xi)
13
- if HAS_GSL
15
+ if Statsample.has_gsl?
14
16
  # F(B'Xi)
15
17
  def f(b,x)
16
18
  p_bx=(x*b)[0,0]
@@ -67,7 +69,7 @@ module Statsample
67
69
  raise "x.columns!=p.rows" if x.column_size!=b.row_size
68
70
  n = x.row_size
69
71
  k = x.column_size
70
- if HAS_GSL
72
+ if Statsample.has_gsl?
71
73
  sum=GSL::Matrix.zeros(k)
72
74
  else
73
75
  sum=Matrix.zero(k)
@@ -76,12 +78,12 @@ module Statsample
76
78
  xi=Matrix.rows([x.row(i).to_a])
77
79
  fbx=f(b,xi)
78
80
  val=((ff(b,xi)**2) / (fbx*(1.0-fbx)))*xi.t*xi
79
- if HAS_GSL
81
+ if Statsample.has_gsl?
80
82
  val=val.to_gsl
81
83
  end
82
84
  sum-=val
83
85
  end
84
- if HAS_GSL
86
+ if Statsample.has_gsl?
85
87
  sum=sum.to_matrix
86
88
  end
87
89
  sum
@@ -1,7 +1,9 @@
1
1
  module Statsample
2
- # Module for MLE calculations.
2
+ # Module for generic MLE calculations.
3
3
  # Use subclass of BaseMLE for specific MLE model estimation.
4
- # Usage:
4
+ # You should visit Statsample::Regression for method to perform fast
5
+ # regression analysis.
6
+ # == Usage:
5
7
  #
6
8
  # mle=Statsample::MLE::Probit.new
7
9
  # mle.newton_raphson(x,y)
@@ -24,53 +24,53 @@ module Statsample
24
24
  @datasets.size
25
25
  end
26
26
  def add_dataset(key,ds)
27
- if(ds.fields!=@fields)
28
- raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
29
- else
30
- @datasets[key]=ds
31
- end
27
+ if(ds.fields!=@fields)
28
+ raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
29
+ else
30
+ @datasets[key]=ds
31
+ end
32
32
  end
33
33
  def sum_field(field)
34
- @datasets.inject(0) {|a,da|
35
- stratum_name=da[0]
36
- vector=da[1][field]
37
- val=yield stratum_name,vector
38
- a+val
39
- }
34
+ @datasets.inject(0) {|a,da|
35
+ stratum_name=da[0]
36
+ vector=da[1][field]
37
+ val=yield stratum_name,vector
38
+ a+val
39
+ }
40
40
  end
41
41
  def collect_vector(field)
42
- @datasets.collect {|k,v|
43
- yield k, v[field]
44
- }
42
+ @datasets.collect {|k,v|
43
+ yield k, v[field]
44
+ }
45
45
  end
46
46
  def[](i)
47
- @datasets[i]
48
- end
47
+ @datasets[i]
49
48
  end
50
- class StratifiedSample
51
- class << self
52
- # mean for an array of vectors
53
- def mean(*vectors)
54
- n_total=0
55
- means=vectors.inject(0){|a,v|
56
- n_total+=v.size
57
- a+v.sum
58
- }
59
- means.to_f/n_total
60
- end
61
-
49
+ end
50
+ class StratifiedSample
51
+ class << self
52
+ # mean for an array of vectors
53
+ def mean(*vectors)
54
+ n_total=0
55
+ means=vectors.inject(0){|a,v|
56
+ n_total+=v.size
57
+ a+v.sum
58
+ }
59
+ means.to_f/n_total
60
+ end
61
+
62
62
  def standard_error_ksd_wr(es)
63
- n_total=0
64
- sum=es.inject(0){|a,h|
65
- n_total+=h['N']
66
- a+((h['N']**2 * h['s']**2) / h['n'].to_f)
67
- }
68
- (1.to_f / n_total)*Math::sqrt(sum)
63
+ n_total=0
64
+ sum=es.inject(0){|a,h|
65
+ n_total+=h['N']
66
+ a+((h['N']**2 * h['s']**2) / h['n'].to_f)
67
+ }
68
+ (1.to_f / n_total)*Math::sqrt(sum)
69
69
  end
70
70
 
71
71
 
72
72
  def variance_ksd_wr(es)
73
- standard_error_ksd_wr(es)**2
73
+ standard_error_ksd_wr(es)**2
74
74
  end
75
75
  def calculate_n_total(es)
76
76
  es.inject(0) {|a,h| a+h['N'] }
@@ -85,7 +85,7 @@ module Statsample
85
85
  }
86
86
  end
87
87
  def standard_error_ksd_wor(es)
88
- Math::sqrt(variance_ksd_wor(es))
88
+ Math::sqrt(variance_ksd_wor(es))
89
89
  end
90
90
 
91
91
 
@@ -101,26 +101,26 @@ module Statsample
101
101
 
102
102
 
103
103
  def standard_error_esd_wor(es)
104
- Math::sqrt(variance_ksd_wor(es))
104
+ Math::sqrt(variance_ksd_wor(es))
105
105
  end
106
106
  # Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
107
107
  def variance_esd_wr(es)
108
108
  n_total=calculate_n_total(es)
109
109
  sum=es.inject(0){|a,h|
110
- val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
111
- a+val
110
+ val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
111
+ a+val
112
112
  }
113
113
  (1.0/(n_total**2))*sum
114
114
  end
115
115
  def standard_error_esd_wr(es)
116
- Math::sqrt(variance_esd_wr(es))
116
+ Math::sqrt(variance_esd_wr(es))
117
117
  end
118
118
 
119
119
  def proportion_variance_ksd_wor(es)
120
120
  n_total=calculate_n_total(es)
121
121
  es.inject(0){|a,h|
122
- val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
123
- a+val
122
+ val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
123
+ a+val
124
124
  }
125
125
  end
126
126
  def proportion_sd_ksd_wor(es)
@@ -141,19 +141,20 @@ module Statsample
141
141
  end
142
142
 
143
143
  def proportion_variance_esd_wor(es)
144
- n_total=n_total=calculate_n_total(es)
145
-
146
- sum=es.inject(0){|a,h|
147
- a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
148
- a+val
149
- }
150
- Math::sqrt(sum) * (1.0/n_total**2)
144
+ n_total=n_total=calculate_n_total(es)
145
+
146
+ sum=es.inject(0){|a,h|
147
+ a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
148
+ a+val
149
+ }
150
+ Math::sqrt(sum) * (1.0/n_total**2)
151
151
  end
152
152
  def proportion_sd_esd_wor(es)
153
153
  Math::sqrt(proportion_variance_ksd_wor(es))
154
154
  end
155
- end
156
- def initialize(ms,strata_sizes)
155
+ end
156
+
157
+ def initialize(ms,strata_sizes)
157
158
  raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
158
159
  @ms=ms
159
160
  raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
@@ -161,104 +162,104 @@ module Statsample
161
162
  @population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
162
163
  @strata_number=@ms.n_datasets
163
164
  @sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
164
- end
165
- # Number of strata
166
- def strata_number
165
+ end
166
+ # Number of strata
167
+ def strata_number
167
168
  @strata_number
168
- end
169
- # Population size. Equal to sum of strata sizes
170
- # Symbol: N<sub>h</sub>
171
- def population_size
169
+ end
170
+ # Population size. Equal to sum of strata sizes
171
+ # Symbol: N<sub>h</sub>
172
+ def population_size
172
173
  @population_size
173
- end
174
- # Sample size. Equal to sum of sample of each stratum
175
- def sample_size
174
+ end
175
+ # Sample size. Equal to sum of sample of each stratum
176
+ def sample_size
176
177
  @sample_size
177
- end
178
- # Size of stratum x
179
- def stratum_size(h)
178
+ end
179
+ # Size of stratum x
180
+ def stratum_size(h)
180
181
  @strata_sizes[h]
181
- end
182
- def vectors_by_field(field)
182
+ end
183
+ def vectors_by_field(field)
183
184
  @ms.datasets.collect{|k,ds|
184
- ds[field]
185
+ ds[field]
185
186
  }
186
- end
187
- # Population proportion based on strata
188
- def proportion(field, v=1)
189
- @ms.sum_field(field) {|s_name,vector|
190
- stratum_ponderation(s_name)*vector.proportion(v)
191
- }
192
- end
193
- # Stratum ponderation.
194
- # Symbol: W\<sub>h\</sub>
195
- def stratum_ponderation(h)
196
- @strata_sizes[h].to_f / @population_size
197
- end
198
- alias_method :wh, :stratum_ponderation
199
-
200
- # Population mean based on strata
201
- def mean(field)
202
- @ms.sum_field(field) {|s_name,vector|
203
- stratum_ponderation(s_name)*vector.mean
204
- }
205
- end
206
- # Standard error with estimated population variance and without replacement.
207
- # Source: Cochran (1972)
208
- def standard_error_wor(field)
187
+ end
188
+ # Population proportion based on strata
189
+ def proportion(field, v=1)
190
+ @ms.sum_field(field) {|s_name,vector|
191
+ stratum_ponderation(s_name)*vector.proportion(v)
192
+ }
193
+ end
194
+ # Stratum ponderation.
195
+ # Symbol: W\<sub>h\</sub>
196
+ def stratum_ponderation(h)
197
+ @strata_sizes[h].to_f / @population_size
198
+ end
199
+ alias_method :wh, :stratum_ponderation
200
+
201
+ # Population mean based on strata
202
+ def mean(field)
203
+ @ms.sum_field(field) {|s_name,vector|
204
+ stratum_ponderation(s_name)*vector.mean
205
+ }
206
+ end
207
+ # Standard error with estimated population variance and without replacement.
208
+ # Source: Cochran (1972)
209
+ def standard_error_wor(field)
209
210
  es=@ms.collect_vector(field) {|s_n, vector|
210
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
211
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
211
212
  }
212
213
 
213
214
  StratifiedSample.standard_error_esd_wor(es)
214
- end
215
-
216
- # Standard error with estimated population variance and without replacement.
217
- # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
218
-
219
- def standard_error_wor_2(field)
220
- sum=@ms.sum_field(field) {|s_name,vector|
221
- s_size=@strata_sizes[s_name]
222
- (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
223
- }
215
+ end
216
+
217
+ # Standard error with estimated population variance and without replacement.
218
+ # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
219
+
220
+ def standard_error_wor_2(field)
221
+ sum=@ms.sum_field(field) {|s_name,vector|
222
+ s_size=@strata_sizes[s_name]
223
+ (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
224
+ }
224
225
  (1/@population_size.to_f)*Math::sqrt(sum)
225
- end
226
-
227
- def standard_error_wr(field)
226
+ end
227
+
228
+ def standard_error_wr(field)
228
229
  es=@ms.collect_vector(field) {|s_n, vector|
229
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
230
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
230
231
  }
231
232
 
232
233
  StratifiedSample.standard_error_esd_wr(es)
233
- end
234
- def proportion_sd_esd_wor(field,v=1)
234
+ end
235
+ def proportion_sd_esd_wor(field,v=1)
235
236
  es=@ms.collect_vector(field) {|s_n, vector|
236
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
237
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
237
238
  }
238
239
 
239
240
  StratifiedSample.proportion_sd_esd_wor(es)
240
- end
241
-
242
- def proportion_standard_error(field,v=1)
241
+ end
242
+
243
+ def proportion_standard_error(field,v=1)
243
244
  prop=proportion(field,v)
244
245
  sum=@ms.sum_field(field) {|s_name,vector|
245
- nh=vector.size
246
- s_size=@strata_sizes[s_name]
247
- (s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
246
+ nh=vector.size
247
+ s_size=@strata_sizes[s_name]
248
+ (s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
248
249
  }
249
250
  (1.quo(@population_size)) * Math::sqrt(sum)
250
- end
251
- # Cochran(1971), p. 150
252
- def variance_pst(field,v=1)
251
+ end
252
+ # Cochran(1971), p. 150
253
+ def variance_pst(field,v=1)
253
254
  sum=@ms.datasets.inject(0) {|a,da|
254
- stratum_name=da[0]
255
- ds=da[1]
256
- nh=ds.cases.to_f
257
- s_size=@strata_sizes[stratum_name]
258
- prop=ds[field].proportion(v)
259
- a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
255
+ stratum_name=da[0]
256
+ ds=da[1]
257
+ nh=ds.cases.to_f
258
+ s_size=@strata_sizes[stratum_name]
259
+ prop=ds[field].proportion(v)
260
+ a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
260
261
  }
261
262
  (1/@population_size.to_f ** 2)*sum
262
- end
263
+ end
263
264
  end
264
265
  end
@@ -14,7 +14,8 @@ module Statsample
14
14
  # perm=Statsample::Permutation.new([0,0,1,1])
15
15
  # => [[0,0,1,1],[0,1,0,1],[0,1,1,0],[1,0,0,1],[1,0,1,0],[1,1,0,0]]
16
16
  #
17
- # Reference: http://www.cut-the-knot.org/do_you_know/AllPerm.shtml
17
+ # == Reference:
18
+ # * http://www.cut-the-knot.org/do_you_know/AllPerm.shtml
18
19
  class Permutation
19
20
  attr_reader :permutation_number
20
21
  def initialize(v)
@@ -1,11 +1,12 @@
1
1
  module Statsample
2
2
  module Regression
3
3
  module Binomial
4
- # Logistic Regression
4
+ # Logistic Regression class.
5
+ # See Statsample::Regression::Binomial::BaseEngine for documentation
5
6
  class Logit < BaseEngine
6
7
  def initialize(ds,y_var)
7
- model=Statsample::MLE::Logit.new
8
- super(ds,y_var,model)
8
+ model=Statsample::MLE::Logit.new
9
+ super(ds,y_var,model)
9
10
  end
10
11
  end
11
12
  end
@@ -1,7 +1,8 @@
1
1
  module Statsample
2
2
  module Regression
3
3
  module Binomial
4
- # Logistic Regression
4
+ # Probit Regression
5
+ # See Statsample::Regression::Binomial::BaseEngine for documentation
5
6
  class Probit < BaseEngine
6
7
  def initialize(ds,y_var)
7
8
  model=Statsample::MLE::Probit.new
@@ -1,91 +1,72 @@
1
1
  module Statsample
2
2
  module Regression
3
3
  module Binomial
4
- # Create a Logit model object.
5
- # ds:: Dataset
6
- # y:: Name of dependent vector
7
- # Use
8
- # dataset=Statsample::CSV.read("data.csv")
9
- # y="y"
10
- # lr=Statsample::Regression::Binomial.logit(dataset,y)
11
- #
12
- def self.logit(ds,y_var)
13
- Logit.new(ds,y_var)
14
- end
15
- # Create a Probit model object.
16
- # ds:: Dataset
17
- # y:: Name of dependent vector
18
- # Use
19
- # dataset=Statsample::CSV.read("data.csv")
20
- # y="y"
21
- # lr=Statsample::Regression::Binomial.probit(dataset,y)
22
- #
23
-
24
- def self.probit(ds,y_var)
25
- Probit.new(ds,y_var)
26
- end
27
4
  # Base Engine for binomial regression analysis.
28
- # See Statsample::Regression::Binomial.logit() and
29
- # Statsample::Regression::Binomial.probit for fast
30
- # access methods.
31
- #
32
- # Use:
33
- # dataset=Statsample::CSV.read("data.csv")
34
- # y="y"
35
- # model=Statsample::MLE::Logit.new
36
- # lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
37
- class BaseEngine
38
- attr_reader :log_likehood, :iterations
39
- def initialize(ds,y_var,model)
40
- @ds=ds
41
- @y_var=y_var
42
- @dy=@ds[@y_var]
43
- @ds_indep=ds.dup(ds.fields-[y_var])
44
- constant=([1.0]*ds.cases).to_vector(:scale)
45
- @ds_indep.add_vector("_constant",constant)
46
- mat_x=@ds_indep.to_matrix
47
- mat_y=@dy.to_matrix(:vertical)
48
- @fields=@ds_indep.fields
49
- @model=model
50
- coeffs=model.newton_raphson(mat_x, mat_y)
51
- @coeffs=assign_names(coeffs.column(0).to_a)
52
- @iterations=model.iterations
53
- @var_cov_matrix=model.var_cov_matrix
54
- @log_likehood=model.log_likehood(mat_x, mat_y, coeffs)
55
- end # init
56
- # Coefficients standard error
57
- def coeffs_se
58
- out={}
59
- @fields.each_index{|i|
5
+ # Use Statsample::Regression.logit and Statsample::Regression.probit
6
+ # for fast access methods.
7
+ #
8
+ # == Usage:
9
+ # dataset=Statsample::CSV.read("data.csv")
10
+ # y="y"
11
+ # model=Statsample::MLE::Logit.new
12
+ # lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
13
+ class BaseEngine
14
+ attr_reader :log_likehood, :iterations
15
+ # Parameters
16
+ # * ds: Dataset
17
+ # * y_var: Name of dependent variable
18
+ # * model: One of Statsample::Regression::Binomial classes
19
+ def initialize(ds,y_var,model)
20
+ @ds=ds
21
+ @y_var=y_var
22
+ @dy=@ds[@y_var]
23
+ @ds_indep=ds.dup(ds.fields-[y_var])
24
+ constant=([1.0]*ds.cases).to_vector(:scale)
25
+ @ds_indep.add_vector("_constant",constant)
26
+ mat_x=@ds_indep.to_matrix
27
+ mat_y=@dy.to_matrix(:vertical)
28
+ @fields=@ds_indep.fields
29
+ @model=model
30
+ coeffs=model.newton_raphson(mat_x, mat_y)
31
+ @coeffs=assign_names(coeffs.column(0).to_a)
32
+ @iterations=model.iterations
33
+ @var_cov_matrix=model.var_cov_matrix
34
+ @log_likehood=model.log_likehood(mat_x, mat_y, coeffs)
35
+ end # init
36
+ # Coefficients standard error
37
+ def coeffs_se
38
+ out={}
39
+ @fields.each_index{|i|
60
40
  f=@fields[i]
61
41
  out[f]=Math::sqrt(@var_cov_matrix[i,i])
62
- }
63
- out.delete("_constant")
64
- out
65
- end
66
- # Constant value
67
- def constant
68
- @coeffs['_constant']
69
- end
70
- # Regression coefficients
71
- def coeffs
72
- c=@coeffs.dup
73
- c.delete("_constant")
74
- c
75
- end
76
- # Constant standard error
77
- def constant_se
78
- i=@fields.index :_constant
79
- Math::sqrt(@var_cov_matrix[i,i])
80
- end
81
- def assign_names(c)
82
- a={}
83
- @fields.each_index do |i|
84
- a[@fields[i]]=c[i]
42
+ }
43
+ out.delete("_constant")
44
+ out
45
+ end
46
+ # Value of constant on regression
47
+ def constant
48
+ @coeffs['_constant']
49
+ end
50
+ # Constant standard error
51
+ def constant_se
52
+ i=@fields.index :_constant
53
+ Math::sqrt(@var_cov_matrix[i,i])
54
+ end
55
+ # Regression coefficients
56
+ def coeffs
57
+ c=@coeffs.dup
58
+ c.delete("_constant")
59
+ c
60
+ end
61
+
62
+ def assign_names(c) # :nodoc:
63
+ a={}
64
+ @fields.each_index do |i|
65
+ a[@fields[i]]=c[i]
66
+ end
67
+ a
85
68
  end
86
- a
87
- end
88
69
  end # Base Engine
89
- end # Dichotomic
70
+ end # Binomial
90
71
  end # Regression
91
72
  end # Stasample
@@ -110,7 +110,7 @@ module Statsample
110
110
  ds.each{|k,v|
111
111
  ds[k]=v.to_vector(:scale)
112
112
  }
113
- lr=Multiple.listwise(ds.to_dataset,var)
113
+ lr=self.class.new(ds.to_dataset,var)
114
114
  1-lr.r2
115
115
  end
116
116
  # Tolerances for each coefficient