statsample 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +4 -0
- data/README.txt +5 -5
- data/demo/dominance_analysis_bootstrap.rb +9 -3
- data/demo/dominanceanalysis.rb +23 -7
- data/demo/multivariate_correlation.rb +26 -0
- data/lib/statsample.rb +1 -1
- data/lib/statsample/bivariate.rb +24 -4
- data/lib/statsample/bivariate/polychoric.rb +15 -14
- data/lib/statsample/converters.rb +27 -23
- data/lib/statsample/crosstab.rb +1 -44
- data/lib/statsample/dominanceanalysis.rb +158 -64
- data/lib/statsample/dominanceanalysis/bootstrap.rb +16 -7
- data/lib/statsample/matrix.rb +145 -13
- data/lib/statsample/multiset.rb +248 -265
- data/lib/statsample/regression.rb +3 -0
- data/lib/statsample/regression/multiple.rb +65 -23
- data/lib/statsample/regression/multiple/baseengine.rb +19 -20
- data/lib/statsample/regression/multiple/matrixengine.rb +187 -0
- data/lib/statsample/regression/multiple/rubyengine.rb +58 -98
- data/test/test_bivariate.rb +1 -0
- data/test/test_crosstab.rb +0 -3
- data/test/test_dataset.rb +379 -379
- data/test/test_dominance_analysis.rb +43 -0
- data/test/test_matrix.rb +52 -0
- data/test/test_regression.rb +174 -129
- data/test/test_svg_graph.rb +51 -51
- metadata +29 -3
@@ -37,12 +37,20 @@ module Statsample
|
|
37
37
|
@ds=ds
|
38
38
|
@y_var=y_var
|
39
39
|
@n=ds.cases
|
40
|
-
|
41
|
-
@samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
|
40
|
+
|
42
41
|
@n_samples=0
|
43
42
|
@alpha=ALPHA
|
44
43
|
@debug=false
|
45
|
-
|
44
|
+
if y_var.is_a? Array
|
45
|
+
@fields=ds.fields-y_var
|
46
|
+
@regression_class=Regression::Multiple::MultipleDependent
|
47
|
+
|
48
|
+
else
|
49
|
+
@fields=ds.fields-[y_var]
|
50
|
+
@regression_class=Regression::Multiple::MatrixEngine
|
51
|
+
end
|
52
|
+
@samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
|
53
|
+
|
46
54
|
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
|
47
55
|
opts.each{|k,v|
|
48
56
|
self.send("#{k}=",v) if self.respond_to? k
|
@@ -52,10 +60,10 @@ module Statsample
|
|
52
60
|
# lr_class deprecated
|
53
61
|
alias_method :lr_class, :regression_class
|
54
62
|
def da
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
63
|
+
if @da.nil?
|
64
|
+
@da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
|
65
|
+
end
|
66
|
+
@da
|
59
67
|
end
|
60
68
|
# Creates n re-samples from original dataset and store result of
|
61
69
|
# each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
|
@@ -69,6 +77,7 @@ module Statsample
|
|
69
77
|
puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
|
70
78
|
ds_boot=@ds.bootstrap(n)
|
71
79
|
da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
|
80
|
+
|
72
81
|
da_1.total_dominance.each{|k,v|
|
73
82
|
@samples_td[k].push(v)
|
74
83
|
}
|
data/lib/statsample/matrix.rb
CHANGED
@@ -21,22 +21,62 @@ class ::Matrix
|
|
21
21
|
}
|
22
22
|
GSL::Matrix[*out]
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
# Calculate marginal of rows
|
26
|
-
def
|
26
|
+
def row_sum
|
27
27
|
(0...row_size).collect {|i|
|
28
28
|
row(i).to_a.inject(0) {|a,v| a+v}
|
29
29
|
}
|
30
30
|
end
|
31
31
|
# Calculate marginal of columns
|
32
|
-
def
|
32
|
+
def column_sum
|
33
33
|
(0...column_size).collect {|i|
|
34
34
|
column(i).to_a.inject(0) {|a,v| a+v}
|
35
35
|
}
|
36
36
|
end
|
37
|
+
|
38
|
+
|
39
|
+
alias :old_par :[]
|
40
|
+
|
41
|
+
# Select elements and submatrixes
|
42
|
+
# Implement row, column and minor in one method
|
43
|
+
#
|
44
|
+
# * [i,j]:: Element i,j
|
45
|
+
# * [i,:*]:: Row i
|
46
|
+
# * [:*,j]:: Column j
|
47
|
+
# * [i1..i2,j]:: Row i1 to i2, column j
|
48
|
+
|
49
|
+
def [](*args)
|
50
|
+
raise ArgumentError if args.size!=2
|
51
|
+
x=args[0]
|
52
|
+
y=args[1]
|
53
|
+
if x.is_a? Integer and y.is_a? Integer
|
54
|
+
@rows[args[0]][args[1]]
|
55
|
+
else
|
56
|
+
# set ranges according to arguments
|
57
|
+
|
58
|
+
rx=case x
|
59
|
+
when Numeric
|
60
|
+
x..x
|
61
|
+
when :*
|
62
|
+
0..(row_size-1)
|
63
|
+
when Range
|
64
|
+
x
|
65
|
+
end
|
66
|
+
ry=case y
|
67
|
+
when Numeric
|
68
|
+
y..y
|
69
|
+
when :*
|
70
|
+
0..(column_size-1)
|
71
|
+
when Range
|
72
|
+
y
|
73
|
+
end
|
74
|
+
Matrix.rows(rx.collect {|i| ry.collect {|j| @rows[i][j]}})
|
75
|
+
end
|
76
|
+
end
|
37
77
|
# Calculate sum of cells
|
38
78
|
def total_sum
|
39
|
-
|
79
|
+
row_sum.inject(0){|a,v| a+v}
|
40
80
|
end
|
41
81
|
end
|
42
82
|
|
@@ -52,26 +92,118 @@ module GSL
|
|
52
92
|
end
|
53
93
|
|
54
94
|
module Statsample
|
55
|
-
|
56
|
-
|
57
|
-
module CorrelationMatrix
|
95
|
+
# Method for variance/covariance and correlation matrices
|
96
|
+
module CovariateMatrix
|
58
97
|
def summary
|
59
98
|
rp=ReportBuilder.new()
|
60
99
|
rp.add(self)
|
61
100
|
rp.to_text
|
62
101
|
end
|
63
|
-
def
|
64
|
-
@
|
102
|
+
def type=(v)
|
103
|
+
@type=v
|
104
|
+
end
|
105
|
+
def type
|
106
|
+
if row_size.times.find {|i| self[i,i]!=1.0}
|
107
|
+
:covariance
|
108
|
+
else
|
109
|
+
:correlation
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
def correlation
|
114
|
+
if(type==:covariance)
|
115
|
+
matrix=Matrix.rows(row_size.times.collect { |i|
|
116
|
+
column_size.times.collect { |j|
|
117
|
+
if i==j
|
118
|
+
1.0
|
119
|
+
else
|
120
|
+
self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
|
121
|
+
end
|
122
|
+
}
|
123
|
+
})
|
124
|
+
matrix.extend CovariateMatrix
|
125
|
+
matrix.fields_x=fields_x
|
126
|
+
matrix.fields_y=fields_y
|
127
|
+
matrix.type=:correlation
|
128
|
+
matrix
|
129
|
+
else
|
130
|
+
self
|
131
|
+
end
|
132
|
+
end
|
133
|
+
def fields
|
134
|
+
raise "Should be square" if !square?
|
135
|
+
@fields_x
|
136
|
+
end
|
137
|
+
def fields=(v)
|
138
|
+
raise "Matrix should be square" if !square?
|
139
|
+
@fields_x=v
|
140
|
+
@fields_y=v
|
141
|
+
end
|
142
|
+
def fields_x=(v)
|
143
|
+
raise "Size of fields != row_size" if v.size!=row_size
|
144
|
+
@fields_x=v
|
145
|
+
end
|
146
|
+
def fields_y=(v)
|
147
|
+
raise "Size of fields != column_size" if v.size!=column_size
|
148
|
+
@fields_y=v
|
149
|
+
end
|
150
|
+
def fields_x
|
151
|
+
if @fields_x.nil?
|
152
|
+
@fields_x=row_size.times.collect {|i| i}
|
153
|
+
end
|
154
|
+
@fields_x
|
155
|
+
end
|
156
|
+
def fields_y
|
157
|
+
if @fields_y.nil?
|
158
|
+
@fields_y=column_size.times.collect {|i| i}
|
159
|
+
end
|
160
|
+
@fields_y
|
65
161
|
end
|
162
|
+
|
66
163
|
def name=(v)
|
67
164
|
@name=v
|
68
165
|
end
|
166
|
+
def name
|
167
|
+
@name
|
168
|
+
end
|
169
|
+
# Select a submatrix of factors. You could use labels or index to select
|
170
|
+
# the factors.
|
171
|
+
# If you don't specify columns, will be equal to rows
|
172
|
+
# Example:
|
173
|
+
# a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
|
174
|
+
# a.extends CovariateMatrix
|
175
|
+
# a.labels=%w{a b c}
|
176
|
+
# a.submatrix(%{c a}, %w{b})
|
177
|
+
# => Matrix[[0.5],[0.3]]
|
178
|
+
# a.submatrix(%{c a})
|
179
|
+
# => Matrix[[1.0, 0.2] , [0.2, 1.0]]
|
180
|
+
def submatrix(rows,columns=nil)
|
181
|
+
columns||=rows
|
182
|
+
# Convert all labels on index
|
183
|
+
row_index=rows.collect {|v|
|
184
|
+
v.is_a?(Numeric) ? v : fields_x.index(v)
|
185
|
+
}
|
186
|
+
column_index=columns.collect {|v|
|
187
|
+
v.is_a?(Numeric) ? v : fields_y.index(v)
|
188
|
+
}
|
189
|
+
|
190
|
+
|
191
|
+
fx=row_index.collect {|v| fields_x[v]}
|
192
|
+
fy=column_index.collect {|v| fields_y[v]}
|
193
|
+
|
194
|
+
matrix= Matrix.rows(row_index.collect {|i|
|
195
|
+
row=column_index.collect {|j| self[i,j]}})
|
196
|
+
matrix.extend CovariateMatrix
|
197
|
+
matrix.fields_x=fx
|
198
|
+
matrix.fields_y=fy
|
199
|
+
matrix.type=type
|
200
|
+
matrix
|
201
|
+
end
|
69
202
|
def to_reportbuilder(generator)
|
70
|
-
@name||="Correlation Matrix"
|
71
|
-
|
72
|
-
t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+@labels)
|
203
|
+
@name||= (type==:correlation ? "Correlation":"Covariance")+" Matrix"
|
204
|
+
t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+fields_y)
|
73
205
|
row_size.times {|i|
|
74
|
-
t.add_row([
|
206
|
+
t.add_row([fields_x[i]]+@rows[i].collect {|i1| sprintf("%0.3f",i1).gsub("0.",".")})
|
75
207
|
}
|
76
208
|
generator.parse_element(t)
|
77
209
|
end
|
data/lib/statsample/multiset.rb
CHANGED
@@ -1,54 +1,54 @@
|
|
1
1
|
module Statsample
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
end
|
32
|
-
end
|
33
|
-
def sum_field(field)
|
34
|
-
@datasets.inject(0) {|a,da|
|
35
|
-
stratum_name=da[0]
|
36
|
-
vector=da[1][field]
|
37
|
-
val=yield stratum_name,vector
|
38
|
-
a+val
|
39
|
-
}
|
40
|
-
end
|
41
|
-
def collect_vector(field)
|
42
|
-
@datasets.collect {|k,v|
|
43
|
-
yield k, v[field]
|
44
|
-
}
|
45
|
-
end
|
46
|
-
def[](i)
|
47
|
-
@datasets[i]
|
2
|
+
# Multiset joins multiple dataset with the same fields and vectors
|
3
|
+
# but with different number of cases.
|
4
|
+
# This is the base class for stratified and cluster sampling estimation
|
5
|
+
class Multiset
|
6
|
+
attr_reader :fields, :datasets
|
7
|
+
# To create a multiset
|
8
|
+
# * Multiset.new(%w{f1 f2 f3}) # define only fields
|
9
|
+
def initialize(fields)
|
10
|
+
@fields=fields
|
11
|
+
@datasets={}
|
12
|
+
end
|
13
|
+
def self.new_empty_vectors(fields,ds_names)
|
14
|
+
ms=Multiset.new(fields)
|
15
|
+
ds_names.each{|d|
|
16
|
+
ms.add_dataset(d,Dataset.new(fields))
|
17
|
+
}
|
18
|
+
ms
|
19
|
+
end
|
20
|
+
def datasets_names
|
21
|
+
@datasets.keys.sort
|
22
|
+
end
|
23
|
+
def n_datasets
|
24
|
+
@datasets.size
|
25
|
+
end
|
26
|
+
def add_dataset(key,ds)
|
27
|
+
if(ds.fields!=@fields)
|
28
|
+
raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
|
29
|
+
else
|
30
|
+
@datasets[key]=ds
|
48
31
|
end
|
49
32
|
end
|
33
|
+
def sum_field(field)
|
34
|
+
@datasets.inject(0) {|a,da|
|
35
|
+
stratum_name=da[0]
|
36
|
+
vector=da[1][field]
|
37
|
+
val=yield stratum_name,vector
|
38
|
+
a+val
|
39
|
+
}
|
40
|
+
end
|
41
|
+
def collect_vector(field)
|
42
|
+
@datasets.collect {|k,v|
|
43
|
+
yield k, v[field]
|
44
|
+
}
|
45
|
+
end
|
46
|
+
def[](i)
|
47
|
+
@datasets[i]
|
48
|
+
end
|
49
|
+
end
|
50
50
|
class StratifiedSample
|
51
|
-
|
51
|
+
class << self
|
52
52
|
# mean for an array of vectors
|
53
53
|
def mean(*vectors)
|
54
54
|
n_total=0
|
@@ -59,223 +59,206 @@ module Statsample
|
|
59
59
|
means.to_f/n_total
|
60
60
|
end
|
61
61
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
def proportion_variance_esd_wor(es)
|
156
|
-
n_total=es.inject(0) {|a,h|
|
157
|
-
a+h['N']
|
158
|
-
}
|
159
|
-
|
160
|
-
sum=es.inject(0){|a,h|
|
161
|
-
a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
|
162
|
-
a+val
|
163
|
-
}
|
164
|
-
Math::sqrt(sum) * (1.0/n_total**2)
|
165
|
-
end
|
166
|
-
def proportion_sd_esd_wor(es)
|
167
|
-
Math::sqrt(proportion_variance_ksd_wor(es))
|
168
|
-
end
|
169
|
-
|
170
|
-
|
171
|
-
|
62
|
+
def standard_error_ksd_wr(es)
|
63
|
+
n_total=0
|
64
|
+
sum=es.inject(0){|a,h|
|
65
|
+
n_total+=h['N']
|
66
|
+
a+((h['N']**2 * h['s']**2) / h['n'].to_f)
|
67
|
+
}
|
68
|
+
(1.to_f / n_total)*Math::sqrt(sum)
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
def variance_ksd_wr(es)
|
73
|
+
standard_error_ksd_wr(es)**2
|
74
|
+
end
|
75
|
+
def calculate_n_total(es)
|
76
|
+
es.inject(0) {|a,h| a+h['N'] }
|
77
|
+
end
|
78
|
+
# Source : Cochran (1972)
|
79
|
+
|
80
|
+
def variance_ksd_wor(es)
|
81
|
+
n_total=calculate_n_total(es)
|
82
|
+
es.inject(0){|a,h|
|
83
|
+
val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
|
84
|
+
a+val
|
85
|
+
}
|
86
|
+
end
|
87
|
+
def standard_error_ksd_wor(es)
|
88
|
+
Math::sqrt(variance_ksd_wor(es))
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
def variance_esd_wor(es)
|
94
|
+
n_total=calculate_n_total(es)
|
95
|
+
sum=es.inject(0){|a,h|
|
96
|
+
val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
|
97
|
+
a+val
|
98
|
+
}
|
99
|
+
(1.0/(n_total**2))*sum
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
def standard_error_esd_wor(es)
|
104
|
+
Math::sqrt(variance_ksd_wor(es))
|
105
|
+
end
|
106
|
+
# Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
|
107
|
+
def variance_esd_wr(es)
|
108
|
+
n_total=calculate_n_total(es)
|
109
|
+
sum=es.inject(0){|a,h|
|
110
|
+
val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
|
111
|
+
a+val
|
112
|
+
}
|
113
|
+
(1.0/(n_total**2))*sum
|
114
|
+
end
|
115
|
+
def standard_error_esd_wr(es)
|
116
|
+
Math::sqrt(variance_esd_wr(es))
|
117
|
+
end
|
118
|
+
|
119
|
+
def proportion_variance_ksd_wor(es)
|
120
|
+
n_total=calculate_n_total(es)
|
121
|
+
es.inject(0){|a,h|
|
122
|
+
val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
|
123
|
+
a+val
|
124
|
+
}
|
125
|
+
end
|
126
|
+
def proportion_sd_ksd_wor(es)
|
127
|
+
Math::sqrt(proportion_variance_ksd_wor(es))
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
def proportion_sd_ksd_wr(es)
|
132
|
+
n_total=calculate_n_total(es)
|
133
|
+
sum=es.inject(0){|a,h|
|
134
|
+
val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
|
135
|
+
a+val
|
136
|
+
}
|
137
|
+
Math::sqrt(sum) * (1.0/n_total)
|
138
|
+
end
|
139
|
+
def proportion_variance_ksd_wr(es)
|
140
|
+
proportion_variance_ksd_wor(es)**2
|
141
|
+
end
|
142
|
+
|
143
|
+
def proportion_variance_esd_wor(es)
|
144
|
+
n_total=n_total=calculate_n_total(es)
|
145
|
+
|
146
|
+
sum=es.inject(0){|a,h|
|
147
|
+
a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
|
148
|
+
a+val
|
149
|
+
}
|
150
|
+
Math::sqrt(sum) * (1.0/n_total**2)
|
151
|
+
end
|
152
|
+
def proportion_sd_esd_wor(es)
|
153
|
+
Math::sqrt(proportion_variance_ksd_wor(es))
|
154
|
+
end
|
172
155
|
end
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
156
|
+
def initialize(ms,strata_sizes)
|
157
|
+
raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
|
158
|
+
@ms=ms
|
159
|
+
raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
|
160
|
+
@strata_sizes=strata_sizes
|
161
|
+
@population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
|
162
|
+
@strata_number=@ms.n_datasets
|
163
|
+
@sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
|
164
|
+
end
|
165
|
+
# Number of strata
|
166
|
+
def strata_number
|
167
|
+
@strata_number
|
168
|
+
end
|
169
|
+
# Population size. Equal to sum of strata sizes
|
170
|
+
# Symbol: N<sub>h</sub>
|
171
|
+
def population_size
|
172
|
+
@population_size
|
173
|
+
end
|
174
|
+
# Sample size. Equal to sum of sample of each stratum
|
175
|
+
def sample_size
|
176
|
+
@sample_size
|
177
|
+
end
|
178
|
+
# Size of stratum x
|
179
|
+
def stratum_size(h)
|
180
|
+
@strata_sizes[h]
|
181
|
+
end
|
182
|
+
def vectors_by_field(field)
|
183
|
+
@ms.datasets.collect{|k,ds|
|
184
|
+
ds[field]
|
185
|
+
}
|
186
|
+
end
|
187
|
+
# Population proportion based on strata
|
188
|
+
def proportion(field, v=1)
|
189
|
+
@ms.sum_field(field) {|s_name,vector|
|
190
|
+
stratum_ponderation(s_name)*vector.proportion(v)
|
191
|
+
}
|
192
|
+
end
|
193
|
+
# Stratum ponderation.
|
194
|
+
# Symbol: W\<sub>h\</sub>
|
195
|
+
def stratum_ponderation(h)
|
196
|
+
@strata_sizes[h].to_f / @population_size
|
197
|
+
end
|
198
|
+
alias_method :wh, :stratum_ponderation
|
199
|
+
|
200
|
+
# Population mean based on strata
|
201
|
+
def mean(field)
|
202
|
+
@ms.sum_field(field) {|s_name,vector|
|
203
|
+
stratum_ponderation(s_name)*vector.mean
|
204
|
+
}
|
205
|
+
end
|
206
|
+
# Standard error with estimated population variance and without replacement.
|
207
|
+
# Source: Cochran (1972)
|
208
|
+
def standard_error_wor(field)
|
209
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
210
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
211
|
+
}
|
212
|
+
|
213
|
+
StratifiedSample.standard_error_esd_wor(es)
|
214
|
+
end
|
215
|
+
|
216
|
+
# Standard error with estimated population variance and without replacement.
|
217
|
+
# Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
|
218
|
+
|
219
|
+
def standard_error_wor_2(field)
|
220
|
+
sum=@ms.sum_field(field) {|s_name,vector|
|
221
|
+
s_size=@strata_sizes[s_name]
|
222
|
+
(s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
|
223
|
+
}
|
224
|
+
(1/@population_size.to_f)*Math::sqrt(sum)
|
225
|
+
end
|
226
|
+
|
227
|
+
def standard_error_wr(field)
|
228
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
229
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
230
|
+
}
|
231
|
+
|
232
|
+
StratifiedSample.standard_error_esd_wr(es)
|
233
|
+
end
|
234
|
+
def proportion_sd_esd_wor(field,v=1)
|
235
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
236
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
|
237
|
+
}
|
238
|
+
|
239
|
+
StratifiedSample.proportion_sd_esd_wor(es)
|
240
|
+
end
|
241
|
+
|
242
|
+
def proportion_standard_error(field,v=1)
|
243
|
+
prop=proportion(field,v)
|
244
|
+
sum=@ms.sum_field(field) {|s_name,vector|
|
245
|
+
nh=vector.size
|
246
|
+
s_size=@strata_sizes[s_name]
|
247
|
+
(s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
|
248
|
+
}
|
249
|
+
(1.quo(@population_size)) * Math::sqrt(sum)
|
250
|
+
end
|
251
|
+
# Cochran(1971), p. 150
|
252
|
+
def variance_pst(field,v=1)
|
253
|
+
sum=@ms.datasets.inject(0) {|a,da|
|
254
|
+
stratum_name=da[0]
|
255
|
+
ds=da[1]
|
256
|
+
nh=ds.cases.to_f
|
257
|
+
s_size=@strata_sizes[stratum_name]
|
258
|
+
prop=ds[field].proportion(v)
|
259
|
+
a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
|
260
|
+
}
|
261
|
+
(1/@population_size.to_f ** 2)*sum
|
262
|
+
end
|
263
|
+
end
|
281
264
|
end
|