statsample 0.6.5 → 0.6.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/Manifest.txt +6 -0
- data/README.txt +30 -12
- data/Rakefile +91 -0
- data/demo/levene.rb +9 -0
- data/demo/multiple_regression.rb +1 -7
- data/demo/polychoric.rb +1 -0
- data/demo/principal_axis.rb +8 -0
- data/lib/distribution/f.rb +22 -22
- data/lib/spss.rb +99 -99
- data/lib/statsample/bivariate/polychoric.rb +32 -22
- data/lib/statsample/bivariate/tetrachoric.rb +212 -207
- data/lib/statsample/bivariate.rb +6 -6
- data/lib/statsample/codification.rb +65 -65
- data/lib/statsample/combination.rb +60 -59
- data/lib/statsample/converter/csv19.rb +12 -12
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +93 -36
- data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
- data/lib/statsample/dominanceanalysis.rb +5 -6
- data/lib/statsample/factor/pca.rb +41 -11
- data/lib/statsample/factor/principalaxis.rb +105 -29
- data/lib/statsample/factor/rotation.rb +20 -3
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/graph/gdchart.rb +13 -13
- data/lib/statsample/graph/svggraph.rb +166 -167
- data/lib/statsample/matrix.rb +22 -12
- data/lib/statsample/mle/logit.rb +3 -2
- data/lib/statsample/mle/probit.rb +7 -5
- data/lib/statsample/mle.rb +4 -2
- data/lib/statsample/multiset.rb +125 -124
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression/binomial/logit.rb +4 -3
- data/lib/statsample/regression/binomial/probit.rb +2 -1
- data/lib/statsample/regression/binomial.rb +62 -81
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
- data/lib/statsample/regression/multiple.rb +15 -42
- data/lib/statsample/regression/simple.rb +93 -78
- data/lib/statsample/regression.rb +74 -2
- data/lib/statsample/reliability.rb +117 -120
- data/lib/statsample/srs.rb +156 -153
- data/lib/statsample/test/levene.rb +90 -0
- data/lib/statsample/test/umannwhitney.rb +25 -9
- data/lib/statsample/test.rb +2 -0
- data/lib/statsample/vector.rb +388 -413
- data/lib/statsample.rb +74 -30
- data/po/es/statsample.mo +0 -0
- data/test/test_bivariate.rb +5 -4
- data/test/test_combination.rb +1 -1
- data/test/test_dataset.rb +2 -2
- data/test/test_factor.rb +53 -6
- data/test/test_gsl.rb +1 -1
- data/test/test_mle.rb +1 -1
- data/test/test_regression.rb +18 -33
- data/test/test_statistics.rb +15 -33
- data/test/test_stest.rb +35 -0
- data/test/test_svg_graph.rb +2 -2
- data/test/test_vector.rb +331 -333
- metadata +38 -11
data/lib/statsample/matrix.rb
CHANGED
@@ -92,33 +92,43 @@ module GSL
|
|
92
92
|
end
|
93
93
|
|
94
94
|
module Statsample
|
95
|
-
#
|
95
|
+
# Module to add method for variance/covariance and correlation matrices
|
96
|
+
# == Usage
|
97
|
+
# matrix=Matrix[[1,2],[2,3]]
|
98
|
+
# matrix.extend CovariateMatrix
|
99
|
+
#
|
96
100
|
module CovariateMatrix
|
101
|
+
# Gives a nice
|
97
102
|
def summary
|
98
103
|
rp=ReportBuilder.new()
|
99
104
|
rp.add(self)
|
100
105
|
rp.to_text
|
101
106
|
end
|
102
|
-
|
103
|
-
@type=v
|
104
|
-
end
|
107
|
+
# Get type of covariate matrix. Could be :covariance or :correlation
|
105
108
|
def type
|
106
|
-
if row_size
|
107
|
-
|
109
|
+
if row_size==column_size
|
110
|
+
if row_size.times.find {|i| self[i,i]!=1.0}
|
111
|
+
:covariance
|
112
|
+
else
|
113
|
+
:correlation
|
114
|
+
end
|
108
115
|
else
|
109
|
-
|
116
|
+
@type
|
110
117
|
end
|
111
118
|
|
112
119
|
end
|
120
|
+
def type=(t)
|
121
|
+
@type=t
|
122
|
+
end
|
113
123
|
def correlation
|
114
124
|
if(type==:covariance)
|
115
125
|
matrix=Matrix.rows(row_size.times.collect { |i|
|
116
126
|
column_size.times.collect { |j|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
127
|
+
if i==j
|
128
|
+
1.0
|
129
|
+
else
|
130
|
+
self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
|
131
|
+
end
|
122
132
|
}
|
123
133
|
})
|
124
134
|
matrix.extend CovariateMatrix
|
data/lib/statsample/mle/logit.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
module Statsample
|
2
2
|
module MLE
|
3
|
-
# Logit MLE estimation.
|
3
|
+
# Logit MLE estimation.
|
4
|
+
# See Statsample::Regression for methods to generate a logit regression.
|
4
5
|
# Usage:
|
5
6
|
#
|
6
7
|
# mle=Statsample::MLE::Logit.new
|
7
8
|
# mle.newton_raphson(x,y)
|
8
9
|
# beta=mle.parameters
|
9
|
-
# likehood=mle.likehood(x,y,beta)
|
10
|
+
# likehood=mle.likehood(x, y, beta)
|
10
11
|
# iterations=mle.iterations
|
11
12
|
#
|
12
13
|
class Logit < BaseMLE
|
@@ -1,7 +1,9 @@
|
|
1
1
|
module Statsample
|
2
2
|
module MLE
|
3
3
|
# Probit MLE estimation.
|
4
|
-
#
|
4
|
+
# See Statsample::Regression for methods to generate a probit regression.
|
5
|
+
#
|
6
|
+
# == Usage:
|
5
7
|
#
|
6
8
|
# mle=Statsample::MLE::Probit.new
|
7
9
|
# mle.newton_raphson(x,y)
|
@@ -10,7 +12,7 @@ module Statsample
|
|
10
12
|
# iterations=mle.iterations
|
11
13
|
class Probit < BaseMLE
|
12
14
|
# F(B'Xi)
|
13
|
-
if
|
15
|
+
if Statsample.has_gsl?
|
14
16
|
# F(B'Xi)
|
15
17
|
def f(b,x)
|
16
18
|
p_bx=(x*b)[0,0]
|
@@ -67,7 +69,7 @@ module Statsample
|
|
67
69
|
raise "x.columns!=p.rows" if x.column_size!=b.row_size
|
68
70
|
n = x.row_size
|
69
71
|
k = x.column_size
|
70
|
-
if
|
72
|
+
if Statsample.has_gsl?
|
71
73
|
sum=GSL::Matrix.zeros(k)
|
72
74
|
else
|
73
75
|
sum=Matrix.zero(k)
|
@@ -76,12 +78,12 @@ module Statsample
|
|
76
78
|
xi=Matrix.rows([x.row(i).to_a])
|
77
79
|
fbx=f(b,xi)
|
78
80
|
val=((ff(b,xi)**2) / (fbx*(1.0-fbx)))*xi.t*xi
|
79
|
-
if
|
81
|
+
if Statsample.has_gsl?
|
80
82
|
val=val.to_gsl
|
81
83
|
end
|
82
84
|
sum-=val
|
83
85
|
end
|
84
|
-
if
|
86
|
+
if Statsample.has_gsl?
|
85
87
|
sum=sum.to_matrix
|
86
88
|
end
|
87
89
|
sum
|
data/lib/statsample/mle.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
module Statsample
|
2
|
-
# Module for MLE calculations.
|
2
|
+
# Module for generic MLE calculations.
|
3
3
|
# Use subclass of BaseMLE for specific MLE model estimation.
|
4
|
-
#
|
4
|
+
# You should visit Statsample::Regression for method to perform fast
|
5
|
+
# regression analysis.
|
6
|
+
# == Usage:
|
5
7
|
#
|
6
8
|
# mle=Statsample::MLE::Probit.new
|
7
9
|
# mle.newton_raphson(x,y)
|
data/lib/statsample/multiset.rb
CHANGED
@@ -24,53 +24,53 @@ module Statsample
|
|
24
24
|
@datasets.size
|
25
25
|
end
|
26
26
|
def add_dataset(key,ds)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
if(ds.fields!=@fields)
|
28
|
+
raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
|
29
|
+
else
|
30
|
+
@datasets[key]=ds
|
31
|
+
end
|
32
32
|
end
|
33
33
|
def sum_field(field)
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
@datasets.inject(0) {|a,da|
|
35
|
+
stratum_name=da[0]
|
36
|
+
vector=da[1][field]
|
37
|
+
val=yield stratum_name,vector
|
38
|
+
a+val
|
39
|
+
}
|
40
40
|
end
|
41
41
|
def collect_vector(field)
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
@datasets.collect {|k,v|
|
43
|
+
yield k, v[field]
|
44
|
+
}
|
45
45
|
end
|
46
46
|
def[](i)
|
47
|
-
|
48
|
-
end
|
47
|
+
@datasets[i]
|
49
48
|
end
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
49
|
+
end
|
50
|
+
class StratifiedSample
|
51
|
+
class << self
|
52
|
+
# mean for an array of vectors
|
53
|
+
def mean(*vectors)
|
54
|
+
n_total=0
|
55
|
+
means=vectors.inject(0){|a,v|
|
56
|
+
n_total+=v.size
|
57
|
+
a+v.sum
|
58
|
+
}
|
59
|
+
means.to_f/n_total
|
60
|
+
end
|
61
|
+
|
62
62
|
def standard_error_ksd_wr(es)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
63
|
+
n_total=0
|
64
|
+
sum=es.inject(0){|a,h|
|
65
|
+
n_total+=h['N']
|
66
|
+
a+((h['N']**2 * h['s']**2) / h['n'].to_f)
|
67
|
+
}
|
68
|
+
(1.to_f / n_total)*Math::sqrt(sum)
|
69
69
|
end
|
70
70
|
|
71
71
|
|
72
72
|
def variance_ksd_wr(es)
|
73
|
-
|
73
|
+
standard_error_ksd_wr(es)**2
|
74
74
|
end
|
75
75
|
def calculate_n_total(es)
|
76
76
|
es.inject(0) {|a,h| a+h['N'] }
|
@@ -85,7 +85,7 @@ module Statsample
|
|
85
85
|
}
|
86
86
|
end
|
87
87
|
def standard_error_ksd_wor(es)
|
88
|
-
|
88
|
+
Math::sqrt(variance_ksd_wor(es))
|
89
89
|
end
|
90
90
|
|
91
91
|
|
@@ -101,26 +101,26 @@ module Statsample
|
|
101
101
|
|
102
102
|
|
103
103
|
def standard_error_esd_wor(es)
|
104
|
-
|
104
|
+
Math::sqrt(variance_ksd_wor(es))
|
105
105
|
end
|
106
106
|
# Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
|
107
107
|
def variance_esd_wr(es)
|
108
108
|
n_total=calculate_n_total(es)
|
109
109
|
sum=es.inject(0){|a,h|
|
110
|
-
|
111
|
-
|
110
|
+
val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
|
111
|
+
a+val
|
112
112
|
}
|
113
113
|
(1.0/(n_total**2))*sum
|
114
114
|
end
|
115
115
|
def standard_error_esd_wr(es)
|
116
|
-
|
116
|
+
Math::sqrt(variance_esd_wr(es))
|
117
117
|
end
|
118
118
|
|
119
119
|
def proportion_variance_ksd_wor(es)
|
120
120
|
n_total=calculate_n_total(es)
|
121
121
|
es.inject(0){|a,h|
|
122
|
-
|
123
|
-
|
122
|
+
val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
|
123
|
+
a+val
|
124
124
|
}
|
125
125
|
end
|
126
126
|
def proportion_sd_ksd_wor(es)
|
@@ -141,19 +141,20 @@ module Statsample
|
|
141
141
|
end
|
142
142
|
|
143
143
|
def proportion_variance_esd_wor(es)
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
144
|
+
n_total=n_total=calculate_n_total(es)
|
145
|
+
|
146
|
+
sum=es.inject(0){|a,h|
|
147
|
+
a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
|
148
|
+
a+val
|
149
|
+
}
|
150
|
+
Math::sqrt(sum) * (1.0/n_total**2)
|
151
151
|
end
|
152
152
|
def proportion_sd_esd_wor(es)
|
153
153
|
Math::sqrt(proportion_variance_ksd_wor(es))
|
154
154
|
end
|
155
|
-
|
156
|
-
|
155
|
+
end
|
156
|
+
|
157
|
+
def initialize(ms,strata_sizes)
|
157
158
|
raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
|
158
159
|
@ms=ms
|
159
160
|
raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
|
@@ -161,104 +162,104 @@ module Statsample
|
|
161
162
|
@population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
|
162
163
|
@strata_number=@ms.n_datasets
|
163
164
|
@sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
|
164
|
-
|
165
|
-
|
166
|
-
|
165
|
+
end
|
166
|
+
# Number of strata
|
167
|
+
def strata_number
|
167
168
|
@strata_number
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
169
|
+
end
|
170
|
+
# Population size. Equal to sum of strata sizes
|
171
|
+
# Symbol: N<sub>h</sub>
|
172
|
+
def population_size
|
172
173
|
@population_size
|
173
|
-
|
174
|
-
|
175
|
-
|
174
|
+
end
|
175
|
+
# Sample size. Equal to sum of sample of each stratum
|
176
|
+
def sample_size
|
176
177
|
@sample_size
|
177
|
-
|
178
|
-
|
179
|
-
|
178
|
+
end
|
179
|
+
# Size of stratum x
|
180
|
+
def stratum_size(h)
|
180
181
|
@strata_sizes[h]
|
181
|
-
|
182
|
-
|
182
|
+
end
|
183
|
+
def vectors_by_field(field)
|
183
184
|
@ms.datasets.collect{|k,ds|
|
184
|
-
|
185
|
+
ds[field]
|
185
186
|
}
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
187
|
+
end
|
188
|
+
# Population proportion based on strata
|
189
|
+
def proportion(field, v=1)
|
190
|
+
@ms.sum_field(field) {|s_name,vector|
|
191
|
+
stratum_ponderation(s_name)*vector.proportion(v)
|
192
|
+
}
|
193
|
+
end
|
194
|
+
# Stratum ponderation.
|
195
|
+
# Symbol: W\<sub>h\</sub>
|
196
|
+
def stratum_ponderation(h)
|
197
|
+
@strata_sizes[h].to_f / @population_size
|
198
|
+
end
|
199
|
+
alias_method :wh, :stratum_ponderation
|
200
|
+
|
201
|
+
# Population mean based on strata
|
202
|
+
def mean(field)
|
203
|
+
@ms.sum_field(field) {|s_name,vector|
|
204
|
+
stratum_ponderation(s_name)*vector.mean
|
205
|
+
}
|
206
|
+
end
|
207
|
+
# Standard error with estimated population variance and without replacement.
|
208
|
+
# Source: Cochran (1972)
|
209
|
+
def standard_error_wor(field)
|
209
210
|
es=@ms.collect_vector(field) {|s_n, vector|
|
210
|
-
|
211
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
211
212
|
}
|
212
213
|
|
213
214
|
StratifiedSample.standard_error_esd_wor(es)
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
215
|
+
end
|
216
|
+
|
217
|
+
# Standard error with estimated population variance and without replacement.
|
218
|
+
# Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
|
219
|
+
|
220
|
+
def standard_error_wor_2(field)
|
221
|
+
sum=@ms.sum_field(field) {|s_name,vector|
|
222
|
+
s_size=@strata_sizes[s_name]
|
223
|
+
(s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
|
224
|
+
}
|
224
225
|
(1/@population_size.to_f)*Math::sqrt(sum)
|
225
|
-
|
226
|
-
|
227
|
-
|
226
|
+
end
|
227
|
+
|
228
|
+
def standard_error_wr(field)
|
228
229
|
es=@ms.collect_vector(field) {|s_n, vector|
|
229
|
-
|
230
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
230
231
|
}
|
231
232
|
|
232
233
|
StratifiedSample.standard_error_esd_wr(es)
|
233
|
-
|
234
|
-
|
234
|
+
end
|
235
|
+
def proportion_sd_esd_wor(field,v=1)
|
235
236
|
es=@ms.collect_vector(field) {|s_n, vector|
|
236
|
-
|
237
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
|
237
238
|
}
|
238
239
|
|
239
240
|
StratifiedSample.proportion_sd_esd_wor(es)
|
240
|
-
|
241
|
-
|
242
|
-
|
241
|
+
end
|
242
|
+
|
243
|
+
def proportion_standard_error(field,v=1)
|
243
244
|
prop=proportion(field,v)
|
244
245
|
sum=@ms.sum_field(field) {|s_name,vector|
|
245
|
-
|
246
|
-
|
247
|
-
|
246
|
+
nh=vector.size
|
247
|
+
s_size=@strata_sizes[s_name]
|
248
|
+
(s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
|
248
249
|
}
|
249
250
|
(1.quo(@population_size)) * Math::sqrt(sum)
|
250
|
-
|
251
|
-
|
252
|
-
|
251
|
+
end
|
252
|
+
# Cochran(1971), p. 150
|
253
|
+
def variance_pst(field,v=1)
|
253
254
|
sum=@ms.datasets.inject(0) {|a,da|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
255
|
+
stratum_name=da[0]
|
256
|
+
ds=da[1]
|
257
|
+
nh=ds.cases.to_f
|
258
|
+
s_size=@strata_sizes[stratum_name]
|
259
|
+
prop=ds[field].proportion(v)
|
260
|
+
a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
|
260
261
|
}
|
261
262
|
(1/@population_size.to_f ** 2)*sum
|
262
|
-
|
263
|
+
end
|
263
264
|
end
|
264
265
|
end
|
@@ -14,7 +14,8 @@ module Statsample
|
|
14
14
|
# perm=Statsample::Permutation.new([0,0,1,1])
|
15
15
|
# => [[0,0,1,1],[0,1,0,1],[0,1,1,0],[1,0,0,1],[1,0,1,0],[1,1,0,0]]
|
16
16
|
#
|
17
|
-
# Reference:
|
17
|
+
# == Reference:
|
18
|
+
# * http://www.cut-the-knot.org/do_you_know/AllPerm.shtml
|
18
19
|
class Permutation
|
19
20
|
attr_reader :permutation_number
|
20
21
|
def initialize(v)
|
@@ -1,11 +1,12 @@
|
|
1
1
|
module Statsample
|
2
2
|
module Regression
|
3
3
|
module Binomial
|
4
|
-
# Logistic Regression
|
4
|
+
# Logistic Regression class.
|
5
|
+
# See Statsample::Regression::Binomial::BaseEngine for documentation
|
5
6
|
class Logit < BaseEngine
|
6
7
|
def initialize(ds,y_var)
|
7
|
-
|
8
|
-
|
8
|
+
model=Statsample::MLE::Logit.new
|
9
|
+
super(ds,y_var,model)
|
9
10
|
end
|
10
11
|
end
|
11
12
|
end
|
@@ -1,91 +1,72 @@
|
|
1
1
|
module Statsample
|
2
2
|
module Regression
|
3
3
|
module Binomial
|
4
|
-
# Create a Logit model object.
|
5
|
-
# ds:: Dataset
|
6
|
-
# y:: Name of dependent vector
|
7
|
-
# Use
|
8
|
-
# dataset=Statsample::CSV.read("data.csv")
|
9
|
-
# y="y"
|
10
|
-
# lr=Statsample::Regression::Binomial.logit(dataset,y)
|
11
|
-
#
|
12
|
-
def self.logit(ds,y_var)
|
13
|
-
Logit.new(ds,y_var)
|
14
|
-
end
|
15
|
-
# Create a Probit model object.
|
16
|
-
# ds:: Dataset
|
17
|
-
# y:: Name of dependent vector
|
18
|
-
# Use
|
19
|
-
# dataset=Statsample::CSV.read("data.csv")
|
20
|
-
# y="y"
|
21
|
-
# lr=Statsample::Regression::Binomial.probit(dataset,y)
|
22
|
-
#
|
23
|
-
|
24
|
-
def self.probit(ds,y_var)
|
25
|
-
Probit.new(ds,y_var)
|
26
|
-
end
|
27
4
|
# Base Engine for binomial regression analysis.
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
5
|
+
# Use Statsample::Regression.logit and Statsample::Regression.probit
|
6
|
+
# for fast access methods.
|
7
|
+
#
|
8
|
+
# == Usage:
|
9
|
+
# dataset=Statsample::CSV.read("data.csv")
|
10
|
+
# y="y"
|
11
|
+
# model=Statsample::MLE::Logit.new
|
12
|
+
# lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
|
13
|
+
class BaseEngine
|
14
|
+
attr_reader :log_likehood, :iterations
|
15
|
+
# Parameters
|
16
|
+
# * ds: Dataset
|
17
|
+
# * y_var: Name of dependent variable
|
18
|
+
# * model: One of Statsample::Regression::Binomial classes
|
19
|
+
def initialize(ds,y_var,model)
|
20
|
+
@ds=ds
|
21
|
+
@y_var=y_var
|
22
|
+
@dy=@ds[@y_var]
|
23
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
24
|
+
constant=([1.0]*ds.cases).to_vector(:scale)
|
25
|
+
@ds_indep.add_vector("_constant",constant)
|
26
|
+
mat_x=@ds_indep.to_matrix
|
27
|
+
mat_y=@dy.to_matrix(:vertical)
|
28
|
+
@fields=@ds_indep.fields
|
29
|
+
@model=model
|
30
|
+
coeffs=model.newton_raphson(mat_x, mat_y)
|
31
|
+
@coeffs=assign_names(coeffs.column(0).to_a)
|
32
|
+
@iterations=model.iterations
|
33
|
+
@var_cov_matrix=model.var_cov_matrix
|
34
|
+
@log_likehood=model.log_likehood(mat_x, mat_y, coeffs)
|
35
|
+
end # init
|
36
|
+
# Coefficients standard error
|
37
|
+
def coeffs_se
|
38
|
+
out={}
|
39
|
+
@fields.each_index{|i|
|
60
40
|
f=@fields[i]
|
61
41
|
out[f]=Math::sqrt(@var_cov_matrix[i,i])
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
42
|
+
}
|
43
|
+
out.delete("_constant")
|
44
|
+
out
|
45
|
+
end
|
46
|
+
# Value of constant on regression
|
47
|
+
def constant
|
48
|
+
@coeffs['_constant']
|
49
|
+
end
|
50
|
+
# Constant standard error
|
51
|
+
def constant_se
|
52
|
+
i=@fields.index :_constant
|
53
|
+
Math::sqrt(@var_cov_matrix[i,i])
|
54
|
+
end
|
55
|
+
# Regression coefficients
|
56
|
+
def coeffs
|
57
|
+
c=@coeffs.dup
|
58
|
+
c.delete("_constant")
|
59
|
+
c
|
60
|
+
end
|
61
|
+
|
62
|
+
def assign_names(c) # :nodoc:
|
63
|
+
a={}
|
64
|
+
@fields.each_index do |i|
|
65
|
+
a[@fields[i]]=c[i]
|
66
|
+
end
|
67
|
+
a
|
85
68
|
end
|
86
|
-
a
|
87
|
-
end
|
88
69
|
end # Base Engine
|
89
|
-
end #
|
70
|
+
end # Binomial
|
90
71
|
end # Regression
|
91
72
|
end # Stasample
|