statsample 0.6.3 → 0.6.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest.txt +4 -0
- data/README.txt +5 -5
- data/demo/dominance_analysis_bootstrap.rb +9 -3
- data/demo/dominanceanalysis.rb +23 -7
- data/demo/multivariate_correlation.rb +26 -0
- data/lib/statsample.rb +1 -1
- data/lib/statsample/bivariate.rb +24 -4
- data/lib/statsample/bivariate/polychoric.rb +15 -14
- data/lib/statsample/converters.rb +27 -23
- data/lib/statsample/crosstab.rb +1 -44
- data/lib/statsample/dominanceanalysis.rb +158 -64
- data/lib/statsample/dominanceanalysis/bootstrap.rb +16 -7
- data/lib/statsample/matrix.rb +145 -13
- data/lib/statsample/multiset.rb +248 -265
- data/lib/statsample/regression.rb +3 -0
- data/lib/statsample/regression/multiple.rb +65 -23
- data/lib/statsample/regression/multiple/baseengine.rb +19 -20
- data/lib/statsample/regression/multiple/matrixengine.rb +187 -0
- data/lib/statsample/regression/multiple/rubyengine.rb +58 -98
- data/test/test_bivariate.rb +1 -0
- data/test/test_crosstab.rb +0 -3
- data/test/test_dataset.rb +379 -379
- data/test/test_dominance_analysis.rb +43 -0
- data/test/test_matrix.rb +52 -0
- data/test/test_regression.rb +174 -129
- data/test/test_svg_graph.rb +51 -51
- metadata +29 -3
@@ -37,12 +37,20 @@ module Statsample
|
|
37
37
|
@ds=ds
|
38
38
|
@y_var=y_var
|
39
39
|
@n=ds.cases
|
40
|
-
|
41
|
-
@samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
|
40
|
+
|
42
41
|
@n_samples=0
|
43
42
|
@alpha=ALPHA
|
44
43
|
@debug=false
|
45
|
-
|
44
|
+
if y_var.is_a? Array
|
45
|
+
@fields=ds.fields-y_var
|
46
|
+
@regression_class=Regression::Multiple::MultipleDependent
|
47
|
+
|
48
|
+
else
|
49
|
+
@fields=ds.fields-[y_var]
|
50
|
+
@regression_class=Regression::Multiple::MatrixEngine
|
51
|
+
end
|
52
|
+
@samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
|
53
|
+
|
46
54
|
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
|
47
55
|
opts.each{|k,v|
|
48
56
|
self.send("#{k}=",v) if self.respond_to? k
|
@@ -52,10 +60,10 @@ module Statsample
|
|
52
60
|
# lr_class deprecated
|
53
61
|
alias_method :lr_class, :regression_class
|
54
62
|
def da
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
63
|
+
if @da.nil?
|
64
|
+
@da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
|
65
|
+
end
|
66
|
+
@da
|
59
67
|
end
|
60
68
|
# Creates n re-samples from original dataset and store result of
|
61
69
|
# each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
|
@@ -69,6 +77,7 @@ module Statsample
|
|
69
77
|
puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
|
70
78
|
ds_boot=@ds.bootstrap(n)
|
71
79
|
da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
|
80
|
+
|
72
81
|
da_1.total_dominance.each{|k,v|
|
73
82
|
@samples_td[k].push(v)
|
74
83
|
}
|
data/lib/statsample/matrix.rb
CHANGED
@@ -21,22 +21,62 @@ class ::Matrix
|
|
21
21
|
}
|
22
22
|
GSL::Matrix[*out]
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
# Calculate marginal of rows
|
26
|
-
def
|
26
|
+
def row_sum
|
27
27
|
(0...row_size).collect {|i|
|
28
28
|
row(i).to_a.inject(0) {|a,v| a+v}
|
29
29
|
}
|
30
30
|
end
|
31
31
|
# Calculate marginal of columns
|
32
|
-
def
|
32
|
+
def column_sum
|
33
33
|
(0...column_size).collect {|i|
|
34
34
|
column(i).to_a.inject(0) {|a,v| a+v}
|
35
35
|
}
|
36
36
|
end
|
37
|
+
|
38
|
+
|
39
|
+
alias :old_par :[]
|
40
|
+
|
41
|
+
# Select elements and submatrixes
|
42
|
+
# Implement row, column and minor in one method
|
43
|
+
#
|
44
|
+
# * [i,j]:: Element i,j
|
45
|
+
# * [i,:*]:: Row i
|
46
|
+
# * [:*,j]:: Column j
|
47
|
+
# * [i1..i2,j]:: Row i1 to i2, column j
|
48
|
+
|
49
|
+
def [](*args)
|
50
|
+
raise ArgumentError if args.size!=2
|
51
|
+
x=args[0]
|
52
|
+
y=args[1]
|
53
|
+
if x.is_a? Integer and y.is_a? Integer
|
54
|
+
@rows[args[0]][args[1]]
|
55
|
+
else
|
56
|
+
# set ranges according to arguments
|
57
|
+
|
58
|
+
rx=case x
|
59
|
+
when Numeric
|
60
|
+
x..x
|
61
|
+
when :*
|
62
|
+
0..(row_size-1)
|
63
|
+
when Range
|
64
|
+
x
|
65
|
+
end
|
66
|
+
ry=case y
|
67
|
+
when Numeric
|
68
|
+
y..y
|
69
|
+
when :*
|
70
|
+
0..(column_size-1)
|
71
|
+
when Range
|
72
|
+
y
|
73
|
+
end
|
74
|
+
Matrix.rows(rx.collect {|i| ry.collect {|j| @rows[i][j]}})
|
75
|
+
end
|
76
|
+
end
|
37
77
|
# Calculate sum of cells
|
38
78
|
def total_sum
|
39
|
-
|
79
|
+
row_sum.inject(0){|a,v| a+v}
|
40
80
|
end
|
41
81
|
end
|
42
82
|
|
@@ -52,26 +92,118 @@ module GSL
|
|
52
92
|
end
|
53
93
|
|
54
94
|
module Statsample
|
55
|
-
|
56
|
-
|
57
|
-
module CorrelationMatrix
|
95
|
+
# Method for variance/covariance and correlation matrices
|
96
|
+
module CovariateMatrix
|
58
97
|
def summary
|
59
98
|
rp=ReportBuilder.new()
|
60
99
|
rp.add(self)
|
61
100
|
rp.to_text
|
62
101
|
end
|
63
|
-
def
|
64
|
-
@
|
102
|
+
def type=(v)
|
103
|
+
@type=v
|
104
|
+
end
|
105
|
+
def type
|
106
|
+
if row_size.times.find {|i| self[i,i]!=1.0}
|
107
|
+
:covariance
|
108
|
+
else
|
109
|
+
:correlation
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
def correlation
|
114
|
+
if(type==:covariance)
|
115
|
+
matrix=Matrix.rows(row_size.times.collect { |i|
|
116
|
+
column_size.times.collect { |j|
|
117
|
+
if i==j
|
118
|
+
1.0
|
119
|
+
else
|
120
|
+
self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
|
121
|
+
end
|
122
|
+
}
|
123
|
+
})
|
124
|
+
matrix.extend CovariateMatrix
|
125
|
+
matrix.fields_x=fields_x
|
126
|
+
matrix.fields_y=fields_y
|
127
|
+
matrix.type=:correlation
|
128
|
+
matrix
|
129
|
+
else
|
130
|
+
self
|
131
|
+
end
|
132
|
+
end
|
133
|
+
def fields
|
134
|
+
raise "Should be square" if !square?
|
135
|
+
@fields_x
|
136
|
+
end
|
137
|
+
def fields=(v)
|
138
|
+
raise "Matrix should be square" if !square?
|
139
|
+
@fields_x=v
|
140
|
+
@fields_y=v
|
141
|
+
end
|
142
|
+
def fields_x=(v)
|
143
|
+
raise "Size of fields != row_size" if v.size!=row_size
|
144
|
+
@fields_x=v
|
145
|
+
end
|
146
|
+
def fields_y=(v)
|
147
|
+
raise "Size of fields != column_size" if v.size!=column_size
|
148
|
+
@fields_y=v
|
149
|
+
end
|
150
|
+
def fields_x
|
151
|
+
if @fields_x.nil?
|
152
|
+
@fields_x=row_size.times.collect {|i| i}
|
153
|
+
end
|
154
|
+
@fields_x
|
155
|
+
end
|
156
|
+
def fields_y
|
157
|
+
if @fields_y.nil?
|
158
|
+
@fields_y=column_size.times.collect {|i| i}
|
159
|
+
end
|
160
|
+
@fields_y
|
65
161
|
end
|
162
|
+
|
66
163
|
def name=(v)
|
67
164
|
@name=v
|
68
165
|
end
|
166
|
+
def name
|
167
|
+
@name
|
168
|
+
end
|
169
|
+
# Select a submatrix of factors. You could use labels or index to select
|
170
|
+
# the factors.
|
171
|
+
# If you don't specify columns, will be equal to rows
|
172
|
+
# Example:
|
173
|
+
# a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
|
174
|
+
# a.extends CovariateMatrix
|
175
|
+
# a.labels=%w{a b c}
|
176
|
+
# a.submatrix(%{c a}, %w{b})
|
177
|
+
# => Matrix[[0.5],[0.3]]
|
178
|
+
# a.submatrix(%{c a})
|
179
|
+
# => Matrix[[1.0, 0.2] , [0.2, 1.0]]
|
180
|
+
def submatrix(rows,columns=nil)
|
181
|
+
columns||=rows
|
182
|
+
# Convert all labels on index
|
183
|
+
row_index=rows.collect {|v|
|
184
|
+
v.is_a?(Numeric) ? v : fields_x.index(v)
|
185
|
+
}
|
186
|
+
column_index=columns.collect {|v|
|
187
|
+
v.is_a?(Numeric) ? v : fields_y.index(v)
|
188
|
+
}
|
189
|
+
|
190
|
+
|
191
|
+
fx=row_index.collect {|v| fields_x[v]}
|
192
|
+
fy=column_index.collect {|v| fields_y[v]}
|
193
|
+
|
194
|
+
matrix= Matrix.rows(row_index.collect {|i|
|
195
|
+
row=column_index.collect {|j| self[i,j]}})
|
196
|
+
matrix.extend CovariateMatrix
|
197
|
+
matrix.fields_x=fx
|
198
|
+
matrix.fields_y=fy
|
199
|
+
matrix.type=type
|
200
|
+
matrix
|
201
|
+
end
|
69
202
|
def to_reportbuilder(generator)
|
70
|
-
@name||="Correlation Matrix"
|
71
|
-
|
72
|
-
t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+@labels)
|
203
|
+
@name||= (type==:correlation ? "Correlation":"Covariance")+" Matrix"
|
204
|
+
t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+fields_y)
|
73
205
|
row_size.times {|i|
|
74
|
-
t.add_row([
|
206
|
+
t.add_row([fields_x[i]]+@rows[i].collect {|i1| sprintf("%0.3f",i1).gsub("0.",".")})
|
75
207
|
}
|
76
208
|
generator.parse_element(t)
|
77
209
|
end
|
data/lib/statsample/multiset.rb
CHANGED
@@ -1,54 +1,54 @@
|
|
1
1
|
module Statsample
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
end
|
32
|
-
end
|
33
|
-
def sum_field(field)
|
34
|
-
@datasets.inject(0) {|a,da|
|
35
|
-
stratum_name=da[0]
|
36
|
-
vector=da[1][field]
|
37
|
-
val=yield stratum_name,vector
|
38
|
-
a+val
|
39
|
-
}
|
40
|
-
end
|
41
|
-
def collect_vector(field)
|
42
|
-
@datasets.collect {|k,v|
|
43
|
-
yield k, v[field]
|
44
|
-
}
|
45
|
-
end
|
46
|
-
def[](i)
|
47
|
-
@datasets[i]
|
2
|
+
# Multiset joins multiple dataset with the same fields and vectors
|
3
|
+
# but with different number of cases.
|
4
|
+
# This is the base class for stratified and cluster sampling estimation
|
5
|
+
class Multiset
|
6
|
+
attr_reader :fields, :datasets
|
7
|
+
# To create a multiset
|
8
|
+
# * Multiset.new(%w{f1 f2 f3}) # define only fields
|
9
|
+
def initialize(fields)
|
10
|
+
@fields=fields
|
11
|
+
@datasets={}
|
12
|
+
end
|
13
|
+
def self.new_empty_vectors(fields,ds_names)
|
14
|
+
ms=Multiset.new(fields)
|
15
|
+
ds_names.each{|d|
|
16
|
+
ms.add_dataset(d,Dataset.new(fields))
|
17
|
+
}
|
18
|
+
ms
|
19
|
+
end
|
20
|
+
def datasets_names
|
21
|
+
@datasets.keys.sort
|
22
|
+
end
|
23
|
+
def n_datasets
|
24
|
+
@datasets.size
|
25
|
+
end
|
26
|
+
def add_dataset(key,ds)
|
27
|
+
if(ds.fields!=@fields)
|
28
|
+
raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
|
29
|
+
else
|
30
|
+
@datasets[key]=ds
|
48
31
|
end
|
49
32
|
end
|
33
|
+
def sum_field(field)
|
34
|
+
@datasets.inject(0) {|a,da|
|
35
|
+
stratum_name=da[0]
|
36
|
+
vector=da[1][field]
|
37
|
+
val=yield stratum_name,vector
|
38
|
+
a+val
|
39
|
+
}
|
40
|
+
end
|
41
|
+
def collect_vector(field)
|
42
|
+
@datasets.collect {|k,v|
|
43
|
+
yield k, v[field]
|
44
|
+
}
|
45
|
+
end
|
46
|
+
def[](i)
|
47
|
+
@datasets[i]
|
48
|
+
end
|
49
|
+
end
|
50
50
|
class StratifiedSample
|
51
|
-
|
51
|
+
class << self
|
52
52
|
# mean for an array of vectors
|
53
53
|
def mean(*vectors)
|
54
54
|
n_total=0
|
@@ -59,223 +59,206 @@ module Statsample
|
|
59
59
|
means.to_f/n_total
|
60
60
|
end
|
61
61
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
def proportion_variance_esd_wor(es)
|
156
|
-
n_total=es.inject(0) {|a,h|
|
157
|
-
a+h['N']
|
158
|
-
}
|
159
|
-
|
160
|
-
sum=es.inject(0){|a,h|
|
161
|
-
a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
|
162
|
-
a+val
|
163
|
-
}
|
164
|
-
Math::sqrt(sum) * (1.0/n_total**2)
|
165
|
-
end
|
166
|
-
def proportion_sd_esd_wor(es)
|
167
|
-
Math::sqrt(proportion_variance_ksd_wor(es))
|
168
|
-
end
|
169
|
-
|
170
|
-
|
171
|
-
|
62
|
+
def standard_error_ksd_wr(es)
|
63
|
+
n_total=0
|
64
|
+
sum=es.inject(0){|a,h|
|
65
|
+
n_total+=h['N']
|
66
|
+
a+((h['N']**2 * h['s']**2) / h['n'].to_f)
|
67
|
+
}
|
68
|
+
(1.to_f / n_total)*Math::sqrt(sum)
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
def variance_ksd_wr(es)
|
73
|
+
standard_error_ksd_wr(es)**2
|
74
|
+
end
|
75
|
+
def calculate_n_total(es)
|
76
|
+
es.inject(0) {|a,h| a+h['N'] }
|
77
|
+
end
|
78
|
+
# Source : Cochran (1972)
|
79
|
+
|
80
|
+
def variance_ksd_wor(es)
|
81
|
+
n_total=calculate_n_total(es)
|
82
|
+
es.inject(0){|a,h|
|
83
|
+
val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
|
84
|
+
a+val
|
85
|
+
}
|
86
|
+
end
|
87
|
+
def standard_error_ksd_wor(es)
|
88
|
+
Math::sqrt(variance_ksd_wor(es))
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
def variance_esd_wor(es)
|
94
|
+
n_total=calculate_n_total(es)
|
95
|
+
sum=es.inject(0){|a,h|
|
96
|
+
val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
|
97
|
+
a+val
|
98
|
+
}
|
99
|
+
(1.0/(n_total**2))*sum
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
def standard_error_esd_wor(es)
|
104
|
+
Math::sqrt(variance_ksd_wor(es))
|
105
|
+
end
|
106
|
+
# Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
|
107
|
+
def variance_esd_wr(es)
|
108
|
+
n_total=calculate_n_total(es)
|
109
|
+
sum=es.inject(0){|a,h|
|
110
|
+
val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
|
111
|
+
a+val
|
112
|
+
}
|
113
|
+
(1.0/(n_total**2))*sum
|
114
|
+
end
|
115
|
+
def standard_error_esd_wr(es)
|
116
|
+
Math::sqrt(variance_esd_wr(es))
|
117
|
+
end
|
118
|
+
|
119
|
+
def proportion_variance_ksd_wor(es)
|
120
|
+
n_total=calculate_n_total(es)
|
121
|
+
es.inject(0){|a,h|
|
122
|
+
val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
|
123
|
+
a+val
|
124
|
+
}
|
125
|
+
end
|
126
|
+
def proportion_sd_ksd_wor(es)
|
127
|
+
Math::sqrt(proportion_variance_ksd_wor(es))
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
def proportion_sd_ksd_wr(es)
|
132
|
+
n_total=calculate_n_total(es)
|
133
|
+
sum=es.inject(0){|a,h|
|
134
|
+
val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
|
135
|
+
a+val
|
136
|
+
}
|
137
|
+
Math::sqrt(sum) * (1.0/n_total)
|
138
|
+
end
|
139
|
+
def proportion_variance_ksd_wr(es)
|
140
|
+
proportion_variance_ksd_wor(es)**2
|
141
|
+
end
|
142
|
+
|
143
|
+
def proportion_variance_esd_wor(es)
|
144
|
+
n_total=n_total=calculate_n_total(es)
|
145
|
+
|
146
|
+
sum=es.inject(0){|a,h|
|
147
|
+
a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
|
148
|
+
a+val
|
149
|
+
}
|
150
|
+
Math::sqrt(sum) * (1.0/n_total**2)
|
151
|
+
end
|
152
|
+
def proportion_sd_esd_wor(es)
|
153
|
+
Math::sqrt(proportion_variance_ksd_wor(es))
|
154
|
+
end
|
172
155
|
end
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
156
|
+
def initialize(ms,strata_sizes)
|
157
|
+
raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
|
158
|
+
@ms=ms
|
159
|
+
raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
|
160
|
+
@strata_sizes=strata_sizes
|
161
|
+
@population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
|
162
|
+
@strata_number=@ms.n_datasets
|
163
|
+
@sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
|
164
|
+
end
|
165
|
+
# Number of strata
|
166
|
+
def strata_number
|
167
|
+
@strata_number
|
168
|
+
end
|
169
|
+
# Population size. Equal to sum of strata sizes
|
170
|
+
# Symbol: N<sub>h</sub>
|
171
|
+
def population_size
|
172
|
+
@population_size
|
173
|
+
end
|
174
|
+
# Sample size. Equal to sum of sample of each stratum
|
175
|
+
def sample_size
|
176
|
+
@sample_size
|
177
|
+
end
|
178
|
+
# Size of stratum x
|
179
|
+
def stratum_size(h)
|
180
|
+
@strata_sizes[h]
|
181
|
+
end
|
182
|
+
def vectors_by_field(field)
|
183
|
+
@ms.datasets.collect{|k,ds|
|
184
|
+
ds[field]
|
185
|
+
}
|
186
|
+
end
|
187
|
+
# Population proportion based on strata
|
188
|
+
def proportion(field, v=1)
|
189
|
+
@ms.sum_field(field) {|s_name,vector|
|
190
|
+
stratum_ponderation(s_name)*vector.proportion(v)
|
191
|
+
}
|
192
|
+
end
|
193
|
+
# Stratum ponderation.
|
194
|
+
# Symbol: W\<sub>h\</sub>
|
195
|
+
def stratum_ponderation(h)
|
196
|
+
@strata_sizes[h].to_f / @population_size
|
197
|
+
end
|
198
|
+
alias_method :wh, :stratum_ponderation
|
199
|
+
|
200
|
+
# Population mean based on strata
|
201
|
+
def mean(field)
|
202
|
+
@ms.sum_field(field) {|s_name,vector|
|
203
|
+
stratum_ponderation(s_name)*vector.mean
|
204
|
+
}
|
205
|
+
end
|
206
|
+
# Standard error with estimated population variance and without replacement.
|
207
|
+
# Source: Cochran (1972)
|
208
|
+
def standard_error_wor(field)
|
209
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
210
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
211
|
+
}
|
212
|
+
|
213
|
+
StratifiedSample.standard_error_esd_wor(es)
|
214
|
+
end
|
215
|
+
|
216
|
+
# Standard error with estimated population variance and without replacement.
|
217
|
+
# Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
|
218
|
+
|
219
|
+
def standard_error_wor_2(field)
|
220
|
+
sum=@ms.sum_field(field) {|s_name,vector|
|
221
|
+
s_size=@strata_sizes[s_name]
|
222
|
+
(s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
|
223
|
+
}
|
224
|
+
(1/@population_size.to_f)*Math::sqrt(sum)
|
225
|
+
end
|
226
|
+
|
227
|
+
def standard_error_wr(field)
|
228
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
229
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
230
|
+
}
|
231
|
+
|
232
|
+
StratifiedSample.standard_error_esd_wr(es)
|
233
|
+
end
|
234
|
+
def proportion_sd_esd_wor(field,v=1)
|
235
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
236
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
|
237
|
+
}
|
238
|
+
|
239
|
+
StratifiedSample.proportion_sd_esd_wor(es)
|
240
|
+
end
|
241
|
+
|
242
|
+
def proportion_standard_error(field,v=1)
|
243
|
+
prop=proportion(field,v)
|
244
|
+
sum=@ms.sum_field(field) {|s_name,vector|
|
245
|
+
nh=vector.size
|
246
|
+
s_size=@strata_sizes[s_name]
|
247
|
+
(s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
|
248
|
+
}
|
249
|
+
(1.quo(@population_size)) * Math::sqrt(sum)
|
250
|
+
end
|
251
|
+
# Cochran(1971), p. 150
|
252
|
+
def variance_pst(field,v=1)
|
253
|
+
sum=@ms.datasets.inject(0) {|a,da|
|
254
|
+
stratum_name=da[0]
|
255
|
+
ds=da[1]
|
256
|
+
nh=ds.cases.to_f
|
257
|
+
s_size=@strata_sizes[stratum_name]
|
258
|
+
prop=ds[field].proportion(v)
|
259
|
+
a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
|
260
|
+
}
|
261
|
+
(1/@population_size.to_f ** 2)*sum
|
262
|
+
end
|
263
|
+
end
|
281
264
|
end
|