statsample 0.6.5 → 0.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/History.txt +15 -0
  2. data/Manifest.txt +6 -0
  3. data/README.txt +30 -12
  4. data/Rakefile +91 -0
  5. data/demo/levene.rb +9 -0
  6. data/demo/multiple_regression.rb +1 -7
  7. data/demo/polychoric.rb +1 -0
  8. data/demo/principal_axis.rb +8 -0
  9. data/lib/distribution/f.rb +22 -22
  10. data/lib/spss.rb +99 -99
  11. data/lib/statsample/bivariate/polychoric.rb +32 -22
  12. data/lib/statsample/bivariate/tetrachoric.rb +212 -207
  13. data/lib/statsample/bivariate.rb +6 -6
  14. data/lib/statsample/codification.rb +65 -65
  15. data/lib/statsample/combination.rb +60 -59
  16. data/lib/statsample/converter/csv19.rb +12 -12
  17. data/lib/statsample/converters.rb +1 -1
  18. data/lib/statsample/dataset.rb +93 -36
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
  20. data/lib/statsample/dominanceanalysis.rb +5 -6
  21. data/lib/statsample/factor/pca.rb +41 -11
  22. data/lib/statsample/factor/principalaxis.rb +105 -29
  23. data/lib/statsample/factor/rotation.rb +20 -3
  24. data/lib/statsample/factor.rb +1 -1
  25. data/lib/statsample/graph/gdchart.rb +13 -13
  26. data/lib/statsample/graph/svggraph.rb +166 -167
  27. data/lib/statsample/matrix.rb +22 -12
  28. data/lib/statsample/mle/logit.rb +3 -2
  29. data/lib/statsample/mle/probit.rb +7 -5
  30. data/lib/statsample/mle.rb +4 -2
  31. data/lib/statsample/multiset.rb +125 -124
  32. data/lib/statsample/permutation.rb +2 -1
  33. data/lib/statsample/regression/binomial/logit.rb +4 -3
  34. data/lib/statsample/regression/binomial/probit.rb +2 -1
  35. data/lib/statsample/regression/binomial.rb +62 -81
  36. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  37. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  38. data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
  39. data/lib/statsample/regression/multiple.rb +15 -42
  40. data/lib/statsample/regression/simple.rb +93 -78
  41. data/lib/statsample/regression.rb +74 -2
  42. data/lib/statsample/reliability.rb +117 -120
  43. data/lib/statsample/srs.rb +156 -153
  44. data/lib/statsample/test/levene.rb +90 -0
  45. data/lib/statsample/test/umannwhitney.rb +25 -9
  46. data/lib/statsample/test.rb +2 -0
  47. data/lib/statsample/vector.rb +388 -413
  48. data/lib/statsample.rb +74 -30
  49. data/po/es/statsample.mo +0 -0
  50. data/test/test_bivariate.rb +5 -4
  51. data/test/test_combination.rb +1 -1
  52. data/test/test_dataset.rb +2 -2
  53. data/test/test_factor.rb +53 -6
  54. data/test/test_gsl.rb +1 -1
  55. data/test/test_mle.rb +1 -1
  56. data/test/test_regression.rb +18 -33
  57. data/test/test_statistics.rb +15 -33
  58. data/test/test_stest.rb +35 -0
  59. data/test/test_svg_graph.rb +2 -2
  60. data/test/test_vector.rb +331 -333
  61. metadata +38 -11
@@ -92,33 +92,43 @@ module GSL
92
92
  end
93
93
 
94
94
  module Statsample
95
- # Method for variance/covariance and correlation matrices
95
+ # Module to add method for variance/covariance and correlation matrices
96
+ # == Usage
97
+ # matrix=Matrix[[1,2],[2,3]]
98
+ # matrix.extend CovariateMatrix
99
+ #
96
100
  module CovariateMatrix
101
+ # Gives a nice
97
102
  def summary
98
103
  rp=ReportBuilder.new()
99
104
  rp.add(self)
100
105
  rp.to_text
101
106
  end
102
- def type=(v)
103
- @type=v
104
- end
107
+ # Get type of covariate matrix. Could be :covariance or :correlation
105
108
  def type
106
- if row_size.times.find {|i| self[i,i]!=1.0}
107
- :covariance
109
+ if row_size==column_size
110
+ if row_size.times.find {|i| self[i,i]!=1.0}
111
+ :covariance
112
+ else
113
+ :correlation
114
+ end
108
115
  else
109
- :correlation
116
+ @type
110
117
  end
111
118
 
112
119
  end
120
+ def type=(t)
121
+ @type=t
122
+ end
113
123
  def correlation
114
124
  if(type==:covariance)
115
125
  matrix=Matrix.rows(row_size.times.collect { |i|
116
126
  column_size.times.collect { |j|
117
- if i==j
118
- 1.0
119
- else
120
- self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
121
- end
127
+ if i==j
128
+ 1.0
129
+ else
130
+ self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
131
+ end
122
132
  }
123
133
  })
124
134
  matrix.extend CovariateMatrix
@@ -1,12 +1,13 @@
1
1
  module Statsample
2
2
  module MLE
3
- # Logit MLE estimation.
3
+ # Logit MLE estimation.
4
+ # See Statsample::Regression for methods to generate a logit regression.
4
5
  # Usage:
5
6
  #
6
7
  # mle=Statsample::MLE::Logit.new
7
8
  # mle.newton_raphson(x,y)
8
9
  # beta=mle.parameters
9
- # likehood=mle.likehood(x,y,beta)
10
+ # likehood=mle.likehood(x, y, beta)
10
11
  # iterations=mle.iterations
11
12
  #
12
13
  class Logit < BaseMLE
@@ -1,7 +1,9 @@
1
1
  module Statsample
2
2
  module MLE
3
3
  # Probit MLE estimation.
4
- # Usage:
4
+ # See Statsample::Regression for methods to generate a probit regression.
5
+ #
6
+ # == Usage:
5
7
  #
6
8
  # mle=Statsample::MLE::Probit.new
7
9
  # mle.newton_raphson(x,y)
@@ -10,7 +12,7 @@ module Statsample
10
12
  # iterations=mle.iterations
11
13
  class Probit < BaseMLE
12
14
  # F(B'Xi)
13
- if HAS_GSL
15
+ if Statsample.has_gsl?
14
16
  # F(B'Xi)
15
17
  def f(b,x)
16
18
  p_bx=(x*b)[0,0]
@@ -67,7 +69,7 @@ module Statsample
67
69
  raise "x.columns!=p.rows" if x.column_size!=b.row_size
68
70
  n = x.row_size
69
71
  k = x.column_size
70
- if HAS_GSL
72
+ if Statsample.has_gsl?
71
73
  sum=GSL::Matrix.zeros(k)
72
74
  else
73
75
  sum=Matrix.zero(k)
@@ -76,12 +78,12 @@ module Statsample
76
78
  xi=Matrix.rows([x.row(i).to_a])
77
79
  fbx=f(b,xi)
78
80
  val=((ff(b,xi)**2) / (fbx*(1.0-fbx)))*xi.t*xi
79
- if HAS_GSL
81
+ if Statsample.has_gsl?
80
82
  val=val.to_gsl
81
83
  end
82
84
  sum-=val
83
85
  end
84
- if HAS_GSL
86
+ if Statsample.has_gsl?
85
87
  sum=sum.to_matrix
86
88
  end
87
89
  sum
@@ -1,7 +1,9 @@
1
1
  module Statsample
2
- # Module for MLE calculations.
2
+ # Module for generic MLE calculations.
3
3
  # Use subclass of BaseMLE for specific MLE model estimation.
4
- # Usage:
4
+ # You should visit Statsample::Regression for method to perform fast
5
+ # regression analysis.
6
+ # == Usage:
5
7
  #
6
8
  # mle=Statsample::MLE::Probit.new
7
9
  # mle.newton_raphson(x,y)
@@ -24,53 +24,53 @@ module Statsample
24
24
  @datasets.size
25
25
  end
26
26
  def add_dataset(key,ds)
27
- if(ds.fields!=@fields)
28
- raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
29
- else
30
- @datasets[key]=ds
31
- end
27
+ if(ds.fields!=@fields)
28
+ raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
29
+ else
30
+ @datasets[key]=ds
31
+ end
32
32
  end
33
33
  def sum_field(field)
34
- @datasets.inject(0) {|a,da|
35
- stratum_name=da[0]
36
- vector=da[1][field]
37
- val=yield stratum_name,vector
38
- a+val
39
- }
34
+ @datasets.inject(0) {|a,da|
35
+ stratum_name=da[0]
36
+ vector=da[1][field]
37
+ val=yield stratum_name,vector
38
+ a+val
39
+ }
40
40
  end
41
41
  def collect_vector(field)
42
- @datasets.collect {|k,v|
43
- yield k, v[field]
44
- }
42
+ @datasets.collect {|k,v|
43
+ yield k, v[field]
44
+ }
45
45
  end
46
46
  def[](i)
47
- @datasets[i]
48
- end
47
+ @datasets[i]
49
48
  end
50
- class StratifiedSample
51
- class << self
52
- # mean for an array of vectors
53
- def mean(*vectors)
54
- n_total=0
55
- means=vectors.inject(0){|a,v|
56
- n_total+=v.size
57
- a+v.sum
58
- }
59
- means.to_f/n_total
60
- end
61
-
49
+ end
50
+ class StratifiedSample
51
+ class << self
52
+ # mean for an array of vectors
53
+ def mean(*vectors)
54
+ n_total=0
55
+ means=vectors.inject(0){|a,v|
56
+ n_total+=v.size
57
+ a+v.sum
58
+ }
59
+ means.to_f/n_total
60
+ end
61
+
62
62
  def standard_error_ksd_wr(es)
63
- n_total=0
64
- sum=es.inject(0){|a,h|
65
- n_total+=h['N']
66
- a+((h['N']**2 * h['s']**2) / h['n'].to_f)
67
- }
68
- (1.to_f / n_total)*Math::sqrt(sum)
63
+ n_total=0
64
+ sum=es.inject(0){|a,h|
65
+ n_total+=h['N']
66
+ a+((h['N']**2 * h['s']**2) / h['n'].to_f)
67
+ }
68
+ (1.to_f / n_total)*Math::sqrt(sum)
69
69
  end
70
70
 
71
71
 
72
72
  def variance_ksd_wr(es)
73
- standard_error_ksd_wr(es)**2
73
+ standard_error_ksd_wr(es)**2
74
74
  end
75
75
  def calculate_n_total(es)
76
76
  es.inject(0) {|a,h| a+h['N'] }
@@ -85,7 +85,7 @@ module Statsample
85
85
  }
86
86
  end
87
87
  def standard_error_ksd_wor(es)
88
- Math::sqrt(variance_ksd_wor(es))
88
+ Math::sqrt(variance_ksd_wor(es))
89
89
  end
90
90
 
91
91
 
@@ -101,26 +101,26 @@ module Statsample
101
101
 
102
102
 
103
103
  def standard_error_esd_wor(es)
104
- Math::sqrt(variance_ksd_wor(es))
104
+ Math::sqrt(variance_ksd_wor(es))
105
105
  end
106
106
  # Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
107
107
  def variance_esd_wr(es)
108
108
  n_total=calculate_n_total(es)
109
109
  sum=es.inject(0){|a,h|
110
- val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
111
- a+val
110
+ val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
111
+ a+val
112
112
  }
113
113
  (1.0/(n_total**2))*sum
114
114
  end
115
115
  def standard_error_esd_wr(es)
116
- Math::sqrt(variance_esd_wr(es))
116
+ Math::sqrt(variance_esd_wr(es))
117
117
  end
118
118
 
119
119
  def proportion_variance_ksd_wor(es)
120
120
  n_total=calculate_n_total(es)
121
121
  es.inject(0){|a,h|
122
- val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
123
- a+val
122
+ val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
123
+ a+val
124
124
  }
125
125
  end
126
126
  def proportion_sd_ksd_wor(es)
@@ -141,19 +141,20 @@ module Statsample
141
141
  end
142
142
 
143
143
  def proportion_variance_esd_wor(es)
144
- n_total=n_total=calculate_n_total(es)
145
-
146
- sum=es.inject(0){|a,h|
147
- a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
148
- a+val
149
- }
150
- Math::sqrt(sum) * (1.0/n_total**2)
144
+ n_total=n_total=calculate_n_total(es)
145
+
146
+ sum=es.inject(0){|a,h|
147
+ a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
148
+ a+val
149
+ }
150
+ Math::sqrt(sum) * (1.0/n_total**2)
151
151
  end
152
152
  def proportion_sd_esd_wor(es)
153
153
  Math::sqrt(proportion_variance_ksd_wor(es))
154
154
  end
155
- end
156
- def initialize(ms,strata_sizes)
155
+ end
156
+
157
+ def initialize(ms,strata_sizes)
157
158
  raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
158
159
  @ms=ms
159
160
  raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
@@ -161,104 +162,104 @@ module Statsample
161
162
  @population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
162
163
  @strata_number=@ms.n_datasets
163
164
  @sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
164
- end
165
- # Number of strata
166
- def strata_number
165
+ end
166
+ # Number of strata
167
+ def strata_number
167
168
  @strata_number
168
- end
169
- # Population size. Equal to sum of strata sizes
170
- # Symbol: N<sub>h</sub>
171
- def population_size
169
+ end
170
+ # Population size. Equal to sum of strata sizes
171
+ # Symbol: N<sub>h</sub>
172
+ def population_size
172
173
  @population_size
173
- end
174
- # Sample size. Equal to sum of sample of each stratum
175
- def sample_size
174
+ end
175
+ # Sample size. Equal to sum of sample of each stratum
176
+ def sample_size
176
177
  @sample_size
177
- end
178
- # Size of stratum x
179
- def stratum_size(h)
178
+ end
179
+ # Size of stratum x
180
+ def stratum_size(h)
180
181
  @strata_sizes[h]
181
- end
182
- def vectors_by_field(field)
182
+ end
183
+ def vectors_by_field(field)
183
184
  @ms.datasets.collect{|k,ds|
184
- ds[field]
185
+ ds[field]
185
186
  }
186
- end
187
- # Population proportion based on strata
188
- def proportion(field, v=1)
189
- @ms.sum_field(field) {|s_name,vector|
190
- stratum_ponderation(s_name)*vector.proportion(v)
191
- }
192
- end
193
- # Stratum ponderation.
194
- # Symbol: W\<sub>h\</sub>
195
- def stratum_ponderation(h)
196
- @strata_sizes[h].to_f / @population_size
197
- end
198
- alias_method :wh, :stratum_ponderation
199
-
200
- # Population mean based on strata
201
- def mean(field)
202
- @ms.sum_field(field) {|s_name,vector|
203
- stratum_ponderation(s_name)*vector.mean
204
- }
205
- end
206
- # Standard error with estimated population variance and without replacement.
207
- # Source: Cochran (1972)
208
- def standard_error_wor(field)
187
+ end
188
+ # Population proportion based on strata
189
+ def proportion(field, v=1)
190
+ @ms.sum_field(field) {|s_name,vector|
191
+ stratum_ponderation(s_name)*vector.proportion(v)
192
+ }
193
+ end
194
+ # Stratum ponderation.
195
+ # Symbol: W\<sub>h\</sub>
196
+ def stratum_ponderation(h)
197
+ @strata_sizes[h].to_f / @population_size
198
+ end
199
+ alias_method :wh, :stratum_ponderation
200
+
201
+ # Population mean based on strata
202
+ def mean(field)
203
+ @ms.sum_field(field) {|s_name,vector|
204
+ stratum_ponderation(s_name)*vector.mean
205
+ }
206
+ end
207
+ # Standard error with estimated population variance and without replacement.
208
+ # Source: Cochran (1972)
209
+ def standard_error_wor(field)
209
210
  es=@ms.collect_vector(field) {|s_n, vector|
210
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
211
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
211
212
  }
212
213
 
213
214
  StratifiedSample.standard_error_esd_wor(es)
214
- end
215
-
216
- # Standard error with estimated population variance and without replacement.
217
- # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
218
-
219
- def standard_error_wor_2(field)
220
- sum=@ms.sum_field(field) {|s_name,vector|
221
- s_size=@strata_sizes[s_name]
222
- (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
223
- }
215
+ end
216
+
217
+ # Standard error with estimated population variance and without replacement.
218
+ # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
219
+
220
+ def standard_error_wor_2(field)
221
+ sum=@ms.sum_field(field) {|s_name,vector|
222
+ s_size=@strata_sizes[s_name]
223
+ (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
224
+ }
224
225
  (1/@population_size.to_f)*Math::sqrt(sum)
225
- end
226
-
227
- def standard_error_wr(field)
226
+ end
227
+
228
+ def standard_error_wr(field)
228
229
  es=@ms.collect_vector(field) {|s_n, vector|
229
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
230
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
230
231
  }
231
232
 
232
233
  StratifiedSample.standard_error_esd_wr(es)
233
- end
234
- def proportion_sd_esd_wor(field,v=1)
234
+ end
235
+ def proportion_sd_esd_wor(field,v=1)
235
236
  es=@ms.collect_vector(field) {|s_n, vector|
236
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
237
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
237
238
  }
238
239
 
239
240
  StratifiedSample.proportion_sd_esd_wor(es)
240
- end
241
-
242
- def proportion_standard_error(field,v=1)
241
+ end
242
+
243
+ def proportion_standard_error(field,v=1)
243
244
  prop=proportion(field,v)
244
245
  sum=@ms.sum_field(field) {|s_name,vector|
245
- nh=vector.size
246
- s_size=@strata_sizes[s_name]
247
- (s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
246
+ nh=vector.size
247
+ s_size=@strata_sizes[s_name]
248
+ (s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
248
249
  }
249
250
  (1.quo(@population_size)) * Math::sqrt(sum)
250
- end
251
- # Cochran(1971), p. 150
252
- def variance_pst(field,v=1)
251
+ end
252
+ # Cochran(1971), p. 150
253
+ def variance_pst(field,v=1)
253
254
  sum=@ms.datasets.inject(0) {|a,da|
254
- stratum_name=da[0]
255
- ds=da[1]
256
- nh=ds.cases.to_f
257
- s_size=@strata_sizes[stratum_name]
258
- prop=ds[field].proportion(v)
259
- a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
255
+ stratum_name=da[0]
256
+ ds=da[1]
257
+ nh=ds.cases.to_f
258
+ s_size=@strata_sizes[stratum_name]
259
+ prop=ds[field].proportion(v)
260
+ a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
260
261
  }
261
262
  (1/@population_size.to_f ** 2)*sum
262
- end
263
+ end
263
264
  end
264
265
  end
@@ -14,7 +14,8 @@ module Statsample
14
14
  # perm=Statsample::Permutation.new([0,0,1,1])
15
15
  # => [[0,0,1,1],[0,1,0,1],[0,1,1,0],[1,0,0,1],[1,0,1,0],[1,1,0,0]]
16
16
  #
17
- # Reference: http://www.cut-the-knot.org/do_you_know/AllPerm.shtml
17
+ # == Reference:
18
+ # * http://www.cut-the-knot.org/do_you_know/AllPerm.shtml
18
19
  class Permutation
19
20
  attr_reader :permutation_number
20
21
  def initialize(v)
@@ -1,11 +1,12 @@
1
1
  module Statsample
2
2
  module Regression
3
3
  module Binomial
4
- # Logistic Regression
4
+ # Logistic Regression class.
5
+ # See Statsample::Regression::Binomial::BaseEngine for documentation
5
6
  class Logit < BaseEngine
6
7
  def initialize(ds,y_var)
7
- model=Statsample::MLE::Logit.new
8
- super(ds,y_var,model)
8
+ model=Statsample::MLE::Logit.new
9
+ super(ds,y_var,model)
9
10
  end
10
11
  end
11
12
  end
@@ -1,7 +1,8 @@
1
1
  module Statsample
2
2
  module Regression
3
3
  module Binomial
4
- # Logistic Regression
4
+ # Probit Regression
5
+ # See Statsample::Regression::Binomial::BaseEngine for documentation
5
6
  class Probit < BaseEngine
6
7
  def initialize(ds,y_var)
7
8
  model=Statsample::MLE::Probit.new
@@ -1,91 +1,72 @@
1
1
  module Statsample
2
2
  module Regression
3
3
  module Binomial
4
- # Create a Logit model object.
5
- # ds:: Dataset
6
- # y:: Name of dependent vector
7
- # Use
8
- # dataset=Statsample::CSV.read("data.csv")
9
- # y="y"
10
- # lr=Statsample::Regression::Binomial.logit(dataset,y)
11
- #
12
- def self.logit(ds,y_var)
13
- Logit.new(ds,y_var)
14
- end
15
- # Create a Probit model object.
16
- # ds:: Dataset
17
- # y:: Name of dependent vector
18
- # Use
19
- # dataset=Statsample::CSV.read("data.csv")
20
- # y="y"
21
- # lr=Statsample::Regression::Binomial.probit(dataset,y)
22
- #
23
-
24
- def self.probit(ds,y_var)
25
- Probit.new(ds,y_var)
26
- end
27
4
  # Base Engine for binomial regression analysis.
28
- # See Statsample::Regression::Binomial.logit() and
29
- # Statsample::Regression::Binomial.probit for fast
30
- # access methods.
31
- #
32
- # Use:
33
- # dataset=Statsample::CSV.read("data.csv")
34
- # y="y"
35
- # model=Statsample::MLE::Logit.new
36
- # lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
37
- class BaseEngine
38
- attr_reader :log_likehood, :iterations
39
- def initialize(ds,y_var,model)
40
- @ds=ds
41
- @y_var=y_var
42
- @dy=@ds[@y_var]
43
- @ds_indep=ds.dup(ds.fields-[y_var])
44
- constant=([1.0]*ds.cases).to_vector(:scale)
45
- @ds_indep.add_vector("_constant",constant)
46
- mat_x=@ds_indep.to_matrix
47
- mat_y=@dy.to_matrix(:vertical)
48
- @fields=@ds_indep.fields
49
- @model=model
50
- coeffs=model.newton_raphson(mat_x, mat_y)
51
- @coeffs=assign_names(coeffs.column(0).to_a)
52
- @iterations=model.iterations
53
- @var_cov_matrix=model.var_cov_matrix
54
- @log_likehood=model.log_likehood(mat_x, mat_y, coeffs)
55
- end # init
56
- # Coefficients standard error
57
- def coeffs_se
58
- out={}
59
- @fields.each_index{|i|
5
+ # Use Statsample::Regression.logit and Statsample::Regression.probit
6
+ # for fast access methods.
7
+ #
8
+ # == Usage:
9
+ # dataset=Statsample::CSV.read("data.csv")
10
+ # y="y"
11
+ # model=Statsample::MLE::Logit.new
12
+ # lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
13
+ class BaseEngine
14
+ attr_reader :log_likehood, :iterations
15
+ # Parameters
16
+ # * ds: Dataset
17
+ # * y_var: Name of dependent variable
18
+ # * model: One of Statsample::Regression::Binomial classes
19
+ def initialize(ds,y_var,model)
20
+ @ds=ds
21
+ @y_var=y_var
22
+ @dy=@ds[@y_var]
23
+ @ds_indep=ds.dup(ds.fields-[y_var])
24
+ constant=([1.0]*ds.cases).to_vector(:scale)
25
+ @ds_indep.add_vector("_constant",constant)
26
+ mat_x=@ds_indep.to_matrix
27
+ mat_y=@dy.to_matrix(:vertical)
28
+ @fields=@ds_indep.fields
29
+ @model=model
30
+ coeffs=model.newton_raphson(mat_x, mat_y)
31
+ @coeffs=assign_names(coeffs.column(0).to_a)
32
+ @iterations=model.iterations
33
+ @var_cov_matrix=model.var_cov_matrix
34
+ @log_likehood=model.log_likehood(mat_x, mat_y, coeffs)
35
+ end # init
36
+ # Coefficients standard error
37
+ def coeffs_se
38
+ out={}
39
+ @fields.each_index{|i|
60
40
  f=@fields[i]
61
41
  out[f]=Math::sqrt(@var_cov_matrix[i,i])
62
- }
63
- out.delete("_constant")
64
- out
65
- end
66
- # Constant value
67
- def constant
68
- @coeffs['_constant']
69
- end
70
- # Regression coefficients
71
- def coeffs
72
- c=@coeffs.dup
73
- c.delete("_constant")
74
- c
75
- end
76
- # Constant standard error
77
- def constant_se
78
- i=@fields.index :_constant
79
- Math::sqrt(@var_cov_matrix[i,i])
80
- end
81
- def assign_names(c)
82
- a={}
83
- @fields.each_index do |i|
84
- a[@fields[i]]=c[i]
42
+ }
43
+ out.delete("_constant")
44
+ out
45
+ end
46
+ # Value of constant on regression
47
+ def constant
48
+ @coeffs['_constant']
49
+ end
50
+ # Constant standard error
51
+ def constant_se
52
+ i=@fields.index :_constant
53
+ Math::sqrt(@var_cov_matrix[i,i])
54
+ end
55
+ # Regression coefficients
56
+ def coeffs
57
+ c=@coeffs.dup
58
+ c.delete("_constant")
59
+ c
60
+ end
61
+
62
+ def assign_names(c) # :nodoc:
63
+ a={}
64
+ @fields.each_index do |i|
65
+ a[@fields[i]]=c[i]
66
+ end
67
+ a
85
68
  end
86
- a
87
- end
88
69
  end # Base Engine
89
- end # Dichotomic
70
+ end # Binomial
90
71
  end # Regression
91
72
  end # Stasample
@@ -110,7 +110,7 @@ module Statsample
110
110
  ds.each{|k,v|
111
111
  ds[k]=v.to_vector(:scale)
112
112
  }
113
- lr=Multiple.listwise(ds.to_dataset,var)
113
+ lr=self.class.new(ds.to_dataset,var)
114
114
  1-lr.r2
115
115
  end
116
116
  # Tolerances for each coefficient