statsample 0.6.5 → 0.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +15 -0
- data/Manifest.txt +6 -0
- data/README.txt +30 -12
- data/Rakefile +91 -0
- data/demo/levene.rb +9 -0
- data/demo/multiple_regression.rb +1 -7
- data/demo/polychoric.rb +1 -0
- data/demo/principal_axis.rb +8 -0
- data/lib/distribution/f.rb +22 -22
- data/lib/spss.rb +99 -99
- data/lib/statsample/bivariate/polychoric.rb +32 -22
- data/lib/statsample/bivariate/tetrachoric.rb +212 -207
- data/lib/statsample/bivariate.rb +6 -6
- data/lib/statsample/codification.rb +65 -65
- data/lib/statsample/combination.rb +60 -59
- data/lib/statsample/converter/csv19.rb +12 -12
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +93 -36
- data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
- data/lib/statsample/dominanceanalysis.rb +5 -6
- data/lib/statsample/factor/pca.rb +41 -11
- data/lib/statsample/factor/principalaxis.rb +105 -29
- data/lib/statsample/factor/rotation.rb +20 -3
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/graph/gdchart.rb +13 -13
- data/lib/statsample/graph/svggraph.rb +166 -167
- data/lib/statsample/matrix.rb +22 -12
- data/lib/statsample/mle/logit.rb +3 -2
- data/lib/statsample/mle/probit.rb +7 -5
- data/lib/statsample/mle.rb +4 -2
- data/lib/statsample/multiset.rb +125 -124
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression/binomial/logit.rb +4 -3
- data/lib/statsample/regression/binomial/probit.rb +2 -1
- data/lib/statsample/regression/binomial.rb +62 -81
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
- data/lib/statsample/regression/multiple.rb +15 -42
- data/lib/statsample/regression/simple.rb +93 -78
- data/lib/statsample/regression.rb +74 -2
- data/lib/statsample/reliability.rb +117 -120
- data/lib/statsample/srs.rb +156 -153
- data/lib/statsample/test/levene.rb +90 -0
- data/lib/statsample/test/umannwhitney.rb +25 -9
- data/lib/statsample/test.rb +2 -0
- data/lib/statsample/vector.rb +388 -413
- data/lib/statsample.rb +74 -30
- data/po/es/statsample.mo +0 -0
- data/test/test_bivariate.rb +5 -4
- data/test/test_combination.rb +1 -1
- data/test/test_dataset.rb +2 -2
- data/test/test_factor.rb +53 -6
- data/test/test_gsl.rb +1 -1
- data/test/test_mle.rb +1 -1
- data/test/test_regression.rb +18 -33
- data/test/test_statistics.rb +15 -33
- data/test/test_stest.rb +35 -0
- data/test/test_svg_graph.rb +2 -2
- data/test/test_vector.rb +331 -333
- metadata +38 -11
data/lib/statsample/matrix.rb
CHANGED
@@ -92,33 +92,43 @@ module GSL
|
|
92
92
|
end
|
93
93
|
|
94
94
|
module Statsample
|
95
|
-
#
|
95
|
+
# Module to add method for variance/covariance and correlation matrices
|
96
|
+
# == Usage
|
97
|
+
# matrix=Matrix[[1,2],[2,3]]
|
98
|
+
# matrix.extend CovariateMatrix
|
99
|
+
#
|
96
100
|
module CovariateMatrix
|
101
|
+
# Gives a nice
|
97
102
|
def summary
|
98
103
|
rp=ReportBuilder.new()
|
99
104
|
rp.add(self)
|
100
105
|
rp.to_text
|
101
106
|
end
|
102
|
-
|
103
|
-
@type=v
|
104
|
-
end
|
107
|
+
# Get type of covariate matrix. Could be :covariance or :correlation
|
105
108
|
def type
|
106
|
-
if row_size
|
107
|
-
|
109
|
+
if row_size==column_size
|
110
|
+
if row_size.times.find {|i| self[i,i]!=1.0}
|
111
|
+
:covariance
|
112
|
+
else
|
113
|
+
:correlation
|
114
|
+
end
|
108
115
|
else
|
109
|
-
|
116
|
+
@type
|
110
117
|
end
|
111
118
|
|
112
119
|
end
|
120
|
+
def type=(t)
|
121
|
+
@type=t
|
122
|
+
end
|
113
123
|
def correlation
|
114
124
|
if(type==:covariance)
|
115
125
|
matrix=Matrix.rows(row_size.times.collect { |i|
|
116
126
|
column_size.times.collect { |j|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
127
|
+
if i==j
|
128
|
+
1.0
|
129
|
+
else
|
130
|
+
self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
|
131
|
+
end
|
122
132
|
}
|
123
133
|
})
|
124
134
|
matrix.extend CovariateMatrix
|
data/lib/statsample/mle/logit.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
module Statsample
|
2
2
|
module MLE
|
3
|
-
# Logit MLE estimation.
|
3
|
+
# Logit MLE estimation.
|
4
|
+
# See Statsample::Regression for methods to generate a logit regression.
|
4
5
|
# Usage:
|
5
6
|
#
|
6
7
|
# mle=Statsample::MLE::Logit.new
|
7
8
|
# mle.newton_raphson(x,y)
|
8
9
|
# beta=mle.parameters
|
9
|
-
# likehood=mle.likehood(x,y,beta)
|
10
|
+
# likehood=mle.likehood(x, y, beta)
|
10
11
|
# iterations=mle.iterations
|
11
12
|
#
|
12
13
|
class Logit < BaseMLE
|
@@ -1,7 +1,9 @@
|
|
1
1
|
module Statsample
|
2
2
|
module MLE
|
3
3
|
# Probit MLE estimation.
|
4
|
-
#
|
4
|
+
# See Statsample::Regression for methods to generate a probit regression.
|
5
|
+
#
|
6
|
+
# == Usage:
|
5
7
|
#
|
6
8
|
# mle=Statsample::MLE::Probit.new
|
7
9
|
# mle.newton_raphson(x,y)
|
@@ -10,7 +12,7 @@ module Statsample
|
|
10
12
|
# iterations=mle.iterations
|
11
13
|
class Probit < BaseMLE
|
12
14
|
# F(B'Xi)
|
13
|
-
if
|
15
|
+
if Statsample.has_gsl?
|
14
16
|
# F(B'Xi)
|
15
17
|
def f(b,x)
|
16
18
|
p_bx=(x*b)[0,0]
|
@@ -67,7 +69,7 @@ module Statsample
|
|
67
69
|
raise "x.columns!=p.rows" if x.column_size!=b.row_size
|
68
70
|
n = x.row_size
|
69
71
|
k = x.column_size
|
70
|
-
if
|
72
|
+
if Statsample.has_gsl?
|
71
73
|
sum=GSL::Matrix.zeros(k)
|
72
74
|
else
|
73
75
|
sum=Matrix.zero(k)
|
@@ -76,12 +78,12 @@ module Statsample
|
|
76
78
|
xi=Matrix.rows([x.row(i).to_a])
|
77
79
|
fbx=f(b,xi)
|
78
80
|
val=((ff(b,xi)**2) / (fbx*(1.0-fbx)))*xi.t*xi
|
79
|
-
if
|
81
|
+
if Statsample.has_gsl?
|
80
82
|
val=val.to_gsl
|
81
83
|
end
|
82
84
|
sum-=val
|
83
85
|
end
|
84
|
-
if
|
86
|
+
if Statsample.has_gsl?
|
85
87
|
sum=sum.to_matrix
|
86
88
|
end
|
87
89
|
sum
|
data/lib/statsample/mle.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
module Statsample
|
2
|
-
# Module for MLE calculations.
|
2
|
+
# Module for generic MLE calculations.
|
3
3
|
# Use subclass of BaseMLE for specific MLE model estimation.
|
4
|
-
#
|
4
|
+
# You should visit Statsample::Regression for method to perform fast
|
5
|
+
# regression analysis.
|
6
|
+
# == Usage:
|
5
7
|
#
|
6
8
|
# mle=Statsample::MLE::Probit.new
|
7
9
|
# mle.newton_raphson(x,y)
|
data/lib/statsample/multiset.rb
CHANGED
@@ -24,53 +24,53 @@ module Statsample
|
|
24
24
|
@datasets.size
|
25
25
|
end
|
26
26
|
def add_dataset(key,ds)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
if(ds.fields!=@fields)
|
28
|
+
raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
|
29
|
+
else
|
30
|
+
@datasets[key]=ds
|
31
|
+
end
|
32
32
|
end
|
33
33
|
def sum_field(field)
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
@datasets.inject(0) {|a,da|
|
35
|
+
stratum_name=da[0]
|
36
|
+
vector=da[1][field]
|
37
|
+
val=yield stratum_name,vector
|
38
|
+
a+val
|
39
|
+
}
|
40
40
|
end
|
41
41
|
def collect_vector(field)
|
42
|
-
|
43
|
-
|
44
|
-
|
42
|
+
@datasets.collect {|k,v|
|
43
|
+
yield k, v[field]
|
44
|
+
}
|
45
45
|
end
|
46
46
|
def[](i)
|
47
|
-
|
48
|
-
end
|
47
|
+
@datasets[i]
|
49
48
|
end
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
49
|
+
end
|
50
|
+
class StratifiedSample
|
51
|
+
class << self
|
52
|
+
# mean for an array of vectors
|
53
|
+
def mean(*vectors)
|
54
|
+
n_total=0
|
55
|
+
means=vectors.inject(0){|a,v|
|
56
|
+
n_total+=v.size
|
57
|
+
a+v.sum
|
58
|
+
}
|
59
|
+
means.to_f/n_total
|
60
|
+
end
|
61
|
+
|
62
62
|
def standard_error_ksd_wr(es)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
63
|
+
n_total=0
|
64
|
+
sum=es.inject(0){|a,h|
|
65
|
+
n_total+=h['N']
|
66
|
+
a+((h['N']**2 * h['s']**2) / h['n'].to_f)
|
67
|
+
}
|
68
|
+
(1.to_f / n_total)*Math::sqrt(sum)
|
69
69
|
end
|
70
70
|
|
71
71
|
|
72
72
|
def variance_ksd_wr(es)
|
73
|
-
|
73
|
+
standard_error_ksd_wr(es)**2
|
74
74
|
end
|
75
75
|
def calculate_n_total(es)
|
76
76
|
es.inject(0) {|a,h| a+h['N'] }
|
@@ -85,7 +85,7 @@ module Statsample
|
|
85
85
|
}
|
86
86
|
end
|
87
87
|
def standard_error_ksd_wor(es)
|
88
|
-
|
88
|
+
Math::sqrt(variance_ksd_wor(es))
|
89
89
|
end
|
90
90
|
|
91
91
|
|
@@ -101,26 +101,26 @@ module Statsample
|
|
101
101
|
|
102
102
|
|
103
103
|
def standard_error_esd_wor(es)
|
104
|
-
|
104
|
+
Math::sqrt(variance_ksd_wor(es))
|
105
105
|
end
|
106
106
|
# Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
|
107
107
|
def variance_esd_wr(es)
|
108
108
|
n_total=calculate_n_total(es)
|
109
109
|
sum=es.inject(0){|a,h|
|
110
|
-
|
111
|
-
|
110
|
+
val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
|
111
|
+
a+val
|
112
112
|
}
|
113
113
|
(1.0/(n_total**2))*sum
|
114
114
|
end
|
115
115
|
def standard_error_esd_wr(es)
|
116
|
-
|
116
|
+
Math::sqrt(variance_esd_wr(es))
|
117
117
|
end
|
118
118
|
|
119
119
|
def proportion_variance_ksd_wor(es)
|
120
120
|
n_total=calculate_n_total(es)
|
121
121
|
es.inject(0){|a,h|
|
122
|
-
|
123
|
-
|
122
|
+
val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
|
123
|
+
a+val
|
124
124
|
}
|
125
125
|
end
|
126
126
|
def proportion_sd_ksd_wor(es)
|
@@ -141,19 +141,20 @@ module Statsample
|
|
141
141
|
end
|
142
142
|
|
143
143
|
def proportion_variance_esd_wor(es)
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
144
|
+
n_total=n_total=calculate_n_total(es)
|
145
|
+
|
146
|
+
sum=es.inject(0){|a,h|
|
147
|
+
a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
|
148
|
+
a+val
|
149
|
+
}
|
150
|
+
Math::sqrt(sum) * (1.0/n_total**2)
|
151
151
|
end
|
152
152
|
def proportion_sd_esd_wor(es)
|
153
153
|
Math::sqrt(proportion_variance_ksd_wor(es))
|
154
154
|
end
|
155
|
-
|
156
|
-
|
155
|
+
end
|
156
|
+
|
157
|
+
def initialize(ms,strata_sizes)
|
157
158
|
raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
|
158
159
|
@ms=ms
|
159
160
|
raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
|
@@ -161,104 +162,104 @@ module Statsample
|
|
161
162
|
@population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
|
162
163
|
@strata_number=@ms.n_datasets
|
163
164
|
@sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
|
164
|
-
|
165
|
-
|
166
|
-
|
165
|
+
end
|
166
|
+
# Number of strata
|
167
|
+
def strata_number
|
167
168
|
@strata_number
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
169
|
+
end
|
170
|
+
# Population size. Equal to sum of strata sizes
|
171
|
+
# Symbol: N<sub>h</sub>
|
172
|
+
def population_size
|
172
173
|
@population_size
|
173
|
-
|
174
|
-
|
175
|
-
|
174
|
+
end
|
175
|
+
# Sample size. Equal to sum of sample of each stratum
|
176
|
+
def sample_size
|
176
177
|
@sample_size
|
177
|
-
|
178
|
-
|
179
|
-
|
178
|
+
end
|
179
|
+
# Size of stratum x
|
180
|
+
def stratum_size(h)
|
180
181
|
@strata_sizes[h]
|
181
|
-
|
182
|
-
|
182
|
+
end
|
183
|
+
def vectors_by_field(field)
|
183
184
|
@ms.datasets.collect{|k,ds|
|
184
|
-
|
185
|
+
ds[field]
|
185
186
|
}
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
187
|
+
end
|
188
|
+
# Population proportion based on strata
|
189
|
+
def proportion(field, v=1)
|
190
|
+
@ms.sum_field(field) {|s_name,vector|
|
191
|
+
stratum_ponderation(s_name)*vector.proportion(v)
|
192
|
+
}
|
193
|
+
end
|
194
|
+
# Stratum ponderation.
|
195
|
+
# Symbol: W\<sub>h\</sub>
|
196
|
+
def stratum_ponderation(h)
|
197
|
+
@strata_sizes[h].to_f / @population_size
|
198
|
+
end
|
199
|
+
alias_method :wh, :stratum_ponderation
|
200
|
+
|
201
|
+
# Population mean based on strata
|
202
|
+
def mean(field)
|
203
|
+
@ms.sum_field(field) {|s_name,vector|
|
204
|
+
stratum_ponderation(s_name)*vector.mean
|
205
|
+
}
|
206
|
+
end
|
207
|
+
# Standard error with estimated population variance and without replacement.
|
208
|
+
# Source: Cochran (1972)
|
209
|
+
def standard_error_wor(field)
|
209
210
|
es=@ms.collect_vector(field) {|s_n, vector|
|
210
|
-
|
211
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
211
212
|
}
|
212
213
|
|
213
214
|
StratifiedSample.standard_error_esd_wor(es)
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
215
|
+
end
|
216
|
+
|
217
|
+
# Standard error with estimated population variance and without replacement.
|
218
|
+
# Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
|
219
|
+
|
220
|
+
def standard_error_wor_2(field)
|
221
|
+
sum=@ms.sum_field(field) {|s_name,vector|
|
222
|
+
s_size=@strata_sizes[s_name]
|
223
|
+
(s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
|
224
|
+
}
|
224
225
|
(1/@population_size.to_f)*Math::sqrt(sum)
|
225
|
-
|
226
|
-
|
227
|
-
|
226
|
+
end
|
227
|
+
|
228
|
+
def standard_error_wr(field)
|
228
229
|
es=@ms.collect_vector(field) {|s_n, vector|
|
229
|
-
|
230
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
230
231
|
}
|
231
232
|
|
232
233
|
StratifiedSample.standard_error_esd_wr(es)
|
233
|
-
|
234
|
-
|
234
|
+
end
|
235
|
+
def proportion_sd_esd_wor(field,v=1)
|
235
236
|
es=@ms.collect_vector(field) {|s_n, vector|
|
236
|
-
|
237
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
|
237
238
|
}
|
238
239
|
|
239
240
|
StratifiedSample.proportion_sd_esd_wor(es)
|
240
|
-
|
241
|
-
|
242
|
-
|
241
|
+
end
|
242
|
+
|
243
|
+
def proportion_standard_error(field,v=1)
|
243
244
|
prop=proportion(field,v)
|
244
245
|
sum=@ms.sum_field(field) {|s_name,vector|
|
245
|
-
|
246
|
-
|
247
|
-
|
246
|
+
nh=vector.size
|
247
|
+
s_size=@strata_sizes[s_name]
|
248
|
+
(s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
|
248
249
|
}
|
249
250
|
(1.quo(@population_size)) * Math::sqrt(sum)
|
250
|
-
|
251
|
-
|
252
|
-
|
251
|
+
end
|
252
|
+
# Cochran(1971), p. 150
|
253
|
+
def variance_pst(field,v=1)
|
253
254
|
sum=@ms.datasets.inject(0) {|a,da|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
255
|
+
stratum_name=da[0]
|
256
|
+
ds=da[1]
|
257
|
+
nh=ds.cases.to_f
|
258
|
+
s_size=@strata_sizes[stratum_name]
|
259
|
+
prop=ds[field].proportion(v)
|
260
|
+
a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
|
260
261
|
}
|
261
262
|
(1/@population_size.to_f ** 2)*sum
|
262
|
-
|
263
|
+
end
|
263
264
|
end
|
264
265
|
end
|
@@ -14,7 +14,8 @@ module Statsample
|
|
14
14
|
# perm=Statsample::Permutation.new([0,0,1,1])
|
15
15
|
# => [[0,0,1,1],[0,1,0,1],[0,1,1,0],[1,0,0,1],[1,0,1,0],[1,1,0,0]]
|
16
16
|
#
|
17
|
-
# Reference:
|
17
|
+
# == Reference:
|
18
|
+
# * http://www.cut-the-knot.org/do_you_know/AllPerm.shtml
|
18
19
|
class Permutation
|
19
20
|
attr_reader :permutation_number
|
20
21
|
def initialize(v)
|
@@ -1,11 +1,12 @@
|
|
1
1
|
module Statsample
|
2
2
|
module Regression
|
3
3
|
module Binomial
|
4
|
-
# Logistic Regression
|
4
|
+
# Logistic Regression class.
|
5
|
+
# See Statsample::Regression::Binomial::BaseEngine for documentation
|
5
6
|
class Logit < BaseEngine
|
6
7
|
def initialize(ds,y_var)
|
7
|
-
|
8
|
-
|
8
|
+
model=Statsample::MLE::Logit.new
|
9
|
+
super(ds,y_var,model)
|
9
10
|
end
|
10
11
|
end
|
11
12
|
end
|
@@ -1,91 +1,72 @@
|
|
1
1
|
module Statsample
|
2
2
|
module Regression
|
3
3
|
module Binomial
|
4
|
-
# Create a Logit model object.
|
5
|
-
# ds:: Dataset
|
6
|
-
# y:: Name of dependent vector
|
7
|
-
# Use
|
8
|
-
# dataset=Statsample::CSV.read("data.csv")
|
9
|
-
# y="y"
|
10
|
-
# lr=Statsample::Regression::Binomial.logit(dataset,y)
|
11
|
-
#
|
12
|
-
def self.logit(ds,y_var)
|
13
|
-
Logit.new(ds,y_var)
|
14
|
-
end
|
15
|
-
# Create a Probit model object.
|
16
|
-
# ds:: Dataset
|
17
|
-
# y:: Name of dependent vector
|
18
|
-
# Use
|
19
|
-
# dataset=Statsample::CSV.read("data.csv")
|
20
|
-
# y="y"
|
21
|
-
# lr=Statsample::Regression::Binomial.probit(dataset,y)
|
22
|
-
#
|
23
|
-
|
24
|
-
def self.probit(ds,y_var)
|
25
|
-
Probit.new(ds,y_var)
|
26
|
-
end
|
27
4
|
# Base Engine for binomial regression analysis.
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
5
|
+
# Use Statsample::Regression.logit and Statsample::Regression.probit
|
6
|
+
# for fast access methods.
|
7
|
+
#
|
8
|
+
# == Usage:
|
9
|
+
# dataset=Statsample::CSV.read("data.csv")
|
10
|
+
# y="y"
|
11
|
+
# model=Statsample::MLE::Logit.new
|
12
|
+
# lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
|
13
|
+
class BaseEngine
|
14
|
+
attr_reader :log_likehood, :iterations
|
15
|
+
# Parameters
|
16
|
+
# * ds: Dataset
|
17
|
+
# * y_var: Name of dependent variable
|
18
|
+
# * model: One of Statsample::Regression::Binomial classes
|
19
|
+
def initialize(ds,y_var,model)
|
20
|
+
@ds=ds
|
21
|
+
@y_var=y_var
|
22
|
+
@dy=@ds[@y_var]
|
23
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
24
|
+
constant=([1.0]*ds.cases).to_vector(:scale)
|
25
|
+
@ds_indep.add_vector("_constant",constant)
|
26
|
+
mat_x=@ds_indep.to_matrix
|
27
|
+
mat_y=@dy.to_matrix(:vertical)
|
28
|
+
@fields=@ds_indep.fields
|
29
|
+
@model=model
|
30
|
+
coeffs=model.newton_raphson(mat_x, mat_y)
|
31
|
+
@coeffs=assign_names(coeffs.column(0).to_a)
|
32
|
+
@iterations=model.iterations
|
33
|
+
@var_cov_matrix=model.var_cov_matrix
|
34
|
+
@log_likehood=model.log_likehood(mat_x, mat_y, coeffs)
|
35
|
+
end # init
|
36
|
+
# Coefficients standard error
|
37
|
+
def coeffs_se
|
38
|
+
out={}
|
39
|
+
@fields.each_index{|i|
|
60
40
|
f=@fields[i]
|
61
41
|
out[f]=Math::sqrt(@var_cov_matrix[i,i])
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
42
|
+
}
|
43
|
+
out.delete("_constant")
|
44
|
+
out
|
45
|
+
end
|
46
|
+
# Value of constant on regression
|
47
|
+
def constant
|
48
|
+
@coeffs['_constant']
|
49
|
+
end
|
50
|
+
# Constant standard error
|
51
|
+
def constant_se
|
52
|
+
i=@fields.index :_constant
|
53
|
+
Math::sqrt(@var_cov_matrix[i,i])
|
54
|
+
end
|
55
|
+
# Regression coefficients
|
56
|
+
def coeffs
|
57
|
+
c=@coeffs.dup
|
58
|
+
c.delete("_constant")
|
59
|
+
c
|
60
|
+
end
|
61
|
+
|
62
|
+
def assign_names(c) # :nodoc:
|
63
|
+
a={}
|
64
|
+
@fields.each_index do |i|
|
65
|
+
a[@fields[i]]=c[i]
|
66
|
+
end
|
67
|
+
a
|
85
68
|
end
|
86
|
-
a
|
87
|
-
end
|
88
69
|
end # Base Engine
|
89
|
-
end #
|
70
|
+
end # Binomial
|
90
71
|
end # Regression
|
91
72
|
end # Stasample
|