statsample 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +79 -0
- data/Manifest.txt +56 -0
- data/README.txt +77 -0
- data/Rakefile +22 -0
- data/bin/statsample +2 -0
- data/demo/benchmark.rb +52 -0
- data/demo/chi-square.rb +44 -0
- data/demo/dice.rb +13 -0
- data/demo/distribution_t.rb +95 -0
- data/demo/graph.rb +9 -0
- data/demo/item_analysis.rb +30 -0
- data/demo/mean.rb +81 -0
- data/demo/proportion.rb +57 -0
- data/demo/sample_test.csv +113 -0
- data/demo/strata_proportion.rb +152 -0
- data/demo/stratum.rb +141 -0
- data/lib/spss.rb +131 -0
- data/lib/statsample.rb +216 -0
- data/lib/statsample/anova.rb +74 -0
- data/lib/statsample/bivariate.rb +255 -0
- data/lib/statsample/chidistribution.rb +39 -0
- data/lib/statsample/codification.rb +120 -0
- data/lib/statsample/converters.rb +338 -0
- data/lib/statsample/crosstab.rb +122 -0
- data/lib/statsample/dataset.rb +526 -0
- data/lib/statsample/dominanceanalysis.rb +259 -0
- data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
- data/lib/statsample/graph/gdchart.rb +45 -0
- data/lib/statsample/graph/svgboxplot.rb +108 -0
- data/lib/statsample/graph/svggraph.rb +181 -0
- data/lib/statsample/graph/svghistogram.rb +208 -0
- data/lib/statsample/graph/svgscatterplot.rb +111 -0
- data/lib/statsample/htmlreport.rb +232 -0
- data/lib/statsample/multiset.rb +281 -0
- data/lib/statsample/regression.rb +522 -0
- data/lib/statsample/reliability.rb +235 -0
- data/lib/statsample/resample.rb +20 -0
- data/lib/statsample/srs.rb +159 -0
- data/lib/statsample/test.rb +25 -0
- data/lib/statsample/vector.rb +759 -0
- data/test/_test_chart.rb +58 -0
- data/test/test_anova.rb +31 -0
- data/test/test_codification.rb +59 -0
- data/test/test_crosstab.rb +55 -0
- data/test/test_csv.csv +7 -0
- data/test/test_csv.rb +27 -0
- data/test/test_dataset.rb +293 -0
- data/test/test_ggobi.rb +42 -0
- data/test/test_multiset.rb +98 -0
- data/test/test_regression.rb +108 -0
- data/test/test_reliability.rb +32 -0
- data/test/test_resample.rb +23 -0
- data/test/test_srs.rb +14 -0
- data/test/test_statistics.rb +152 -0
- data/test/test_stratified.rb +19 -0
- data/test/test_svg_graph.rb +63 -0
- data/test/test_vector.rb +265 -0
- data/test/test_xls.rb +32 -0
- metadata +158 -0
@@ -0,0 +1,232 @@
|
|
1
|
+
require 'statsample/graph/svggraph'
|
2
|
+
|
3
|
+
module Statsample
|
4
|
+
class HtmlReport
|
5
|
+
def initialize(name,dir=nil)
|
6
|
+
require 'fileutils'
|
7
|
+
@uniq=1
|
8
|
+
@uniq_file=0
|
9
|
+
@name=name
|
10
|
+
@partials=[]
|
11
|
+
@anchors=[]
|
12
|
+
dir||=@name+"/"
|
13
|
+
@dir=dir
|
14
|
+
@level=1
|
15
|
+
FileUtils.mkdir(@dir) if !File.exists? @dir
|
16
|
+
end
|
17
|
+
def add_summary(name,summary)
|
18
|
+
add_anchor(name)
|
19
|
+
@partials.push(summary)
|
20
|
+
end
|
21
|
+
def add_anchor(name)
|
22
|
+
@anchors.push([name,@level,@uniq])
|
23
|
+
@partials.push("<a name='#{@uniq}'> </a>")
|
24
|
+
@uniq+=1
|
25
|
+
end
|
26
|
+
def uniq_file(prepend="file")
|
27
|
+
@uniq_file+=1
|
28
|
+
"#{prepend}_#{@uniq_file}_#{Time.now.to_i}"
|
29
|
+
end
|
30
|
+
def add_correlation_matrix(ds)
|
31
|
+
add_anchor("Correlation Matrix")
|
32
|
+
html="<h2>Correlation Matrix</h2> <table><thead><th>-</th><th>"+ds.fields.join("</th><th>")+"</th> </thead> <tbody>"
|
33
|
+
matrix=Statsample::Bivariate.correlation_matrix(ds)
|
34
|
+
pmatrix=Statsample::Bivariate.correlation_probability_matrix(ds)
|
35
|
+
|
36
|
+
|
37
|
+
(0...(matrix.row_size)).each {|row|
|
38
|
+
html+="<tr><td>"+ds.fields[row]+"</td>"
|
39
|
+
(0...(matrix.column_size)).each {|col|
|
40
|
+
if matrix[row,col].nil?
|
41
|
+
html+="<td>--</td>"
|
42
|
+
else
|
43
|
+
sig=""
|
44
|
+
prob_out=""
|
45
|
+
if !pmatrix[row,col].nil?
|
46
|
+
prob=pmatrix[row,col]
|
47
|
+
prob_out=sprintf("%0.3f",prob)
|
48
|
+
if prob<0.01
|
49
|
+
sig="**"
|
50
|
+
elsif prob<0.05
|
51
|
+
sig="*"
|
52
|
+
else
|
53
|
+
sig=""
|
54
|
+
end
|
55
|
+
end
|
56
|
+
if sig==""
|
57
|
+
html+="<td>#{sprintf("%0.3f",matrix[row,col])} #{sig}<br /> #{prob_out}</td>"
|
58
|
+
else
|
59
|
+
html+="<td><strong>#{sprintf("%0.3f",matrix[row,col])} #{sig}<br /> #{prob_out}</strong></td>"
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
}
|
64
|
+
html+="</tr>"
|
65
|
+
}
|
66
|
+
html+="</tbody></table>"
|
67
|
+
@partials.push(html)
|
68
|
+
end
|
69
|
+
# Add a scale
|
70
|
+
# First arg is the name of the scale
|
71
|
+
# Other are fields
|
72
|
+
def add_scale(ds,name, fields,icc=false)
|
73
|
+
raise "Fields are empty" if fields.size==0
|
74
|
+
add_anchor("Scale:#{name}")
|
75
|
+
|
76
|
+
ds_partial=ds.dup(fields)
|
77
|
+
ia=Statsample::Reliability::ItemAnalysis.new(ds_partial)
|
78
|
+
html="<h2>Scale: #{name}</h2>"
|
79
|
+
html << ia.html_summary
|
80
|
+
@partials.push(html)
|
81
|
+
@level+=1
|
82
|
+
v=ds_partial.vector_mean
|
83
|
+
add_histogram(name, v)
|
84
|
+
add_runsequence_plot(name, v)
|
85
|
+
add_normalprobability_plot(name,v)
|
86
|
+
add_icc(name,fields) if icc
|
87
|
+
@level-=1
|
88
|
+
end
|
89
|
+
|
90
|
+
def add_boxplot(name,vector,options={})
|
91
|
+
add_graph("Box Plot #{name}", name, vector.svggraph_boxplot(options))
|
92
|
+
end
|
93
|
+
def add_graph(name,id,graph)
|
94
|
+
add_anchor(name)
|
95
|
+
rs_file=@dir+"/#{uniq_file()}.svg"
|
96
|
+
html = "<h3>#{name}</h3> <p><embed src='#{rs_file}' width='#{graph.width}' height='#{graph.height}' type='image/svg+xml' /></p>\n"
|
97
|
+
File.open(rs_file, "w") {|f|
|
98
|
+
f.puts(graph.burn)
|
99
|
+
}
|
100
|
+
@partials.push(html)
|
101
|
+
end
|
102
|
+
def add_runsequence_plot(name, vector,options={})
|
103
|
+
add_graph("Run-Sequence Plot #{name}", name, vector.svggraph_runsequence_plot(options))
|
104
|
+
end
|
105
|
+
def add_lag_plot(name,vector, options={})
|
106
|
+
add_graph("Lag Plot #{name}", name,vector.svggraph_lag_plot(options))
|
107
|
+
end
|
108
|
+
|
109
|
+
def add_normalprobability_plot(name,vector,options={})
|
110
|
+
add_graph("Normal Probability Plot #{name}", name, vector.svggraph_normalprobability_plot(options))
|
111
|
+
end
|
112
|
+
|
113
|
+
def add_scatterplot(name, ds,x_field=nil, y_fields=nil,config={})
|
114
|
+
add_anchor("Scatterplot: #{name}")
|
115
|
+
x_field||=ds.fields[0]
|
116
|
+
y_fields||=ds.fields-[x_field]
|
117
|
+
ds_partial=ds.dup([x_field]+y_fields)
|
118
|
+
sc=Statsample::Graph::SvgScatterplot.new(ds_partial, config)
|
119
|
+
sc.parse
|
120
|
+
sc_file=@dir+"/#{uniq_file("sc")}.svg"
|
121
|
+
html = "<h3>Scatterplot #{name}</h3> <p><embed src='#{sc_file}' width='#{sc.width}' height='#{sc.height}' type='image/svg+xml' /></p>\n"
|
122
|
+
File.open(sc_file, "w") {|f|
|
123
|
+
f.puts(sc.burn)
|
124
|
+
}
|
125
|
+
@partials.push(html)
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
def add_boxplots(name, ds,options={})
|
130
|
+
add_anchor("Boxplots: #{name}")
|
131
|
+
options={:graph_title=>"Boxplots:#{name}", :show_graph_title=>true, :height=>500}.merge! options
|
132
|
+
graph = Statsample::Graph::SvgBoxplot.new(options)
|
133
|
+
ds.fields.each{|f|
|
134
|
+
graph.add_data(:title=>f,
|
135
|
+
:data=>ds[f].valid_data,
|
136
|
+
:vector=>ds[f]
|
137
|
+
)
|
138
|
+
}
|
139
|
+
add_graph(name,name,graph)
|
140
|
+
graph
|
141
|
+
end
|
142
|
+
def add_histogram(name,vector,bins=nil,options={})
|
143
|
+
bins||=vector.size / 15
|
144
|
+
bins=15 if bins>15
|
145
|
+
graph=vector.svggraph_histogram(bins,options)
|
146
|
+
add_graph("Histogram:#{name}",name,graph)
|
147
|
+
html = "<ul><li>Skewness=#{sprintf("%0.3f",vector.skew)}</li>
|
148
|
+
<li>Kurtosis=#{sprintf("%0.3f",vector.kurtosis)}</li></ul>"
|
149
|
+
@partials.push(html)
|
150
|
+
end
|
151
|
+
def add_icc(name,ds, fields)
|
152
|
+
require 'statsample/graph/svggraph'
|
153
|
+
raise "Fields are empty" if fields.size==0
|
154
|
+
add_anchor("ICC:#{name}")
|
155
|
+
ds_partial=ds.dup(fields)
|
156
|
+
ia=Statsample::Reliability::ItemAnalysis.new(ds_partial)
|
157
|
+
html="<h3>ICC for scale: #{name}</h3>"
|
158
|
+
ia.svggraph_item_characteristic_curve(@dir ,name, {:width=>400,:height=>300})
|
159
|
+
ds_partial.fields.sort.each{|f|
|
160
|
+
html << "<div><p><strong>#{f}</strong></p><embed src='#{@dir}/#{name}_#{f}.svg' width='400' height='300' type='image/svg+xml' /></div>\n"
|
161
|
+
}
|
162
|
+
@partials.push(html)
|
163
|
+
end
|
164
|
+
def css
|
165
|
+
<<HERE
|
166
|
+
table {
|
167
|
+
border-collapse: collapse;
|
168
|
+
}
|
169
|
+
th {
|
170
|
+
text-align: left;
|
171
|
+
padding-right: 1em;
|
172
|
+
border-bottom: 3px solid #ccc;
|
173
|
+
}
|
174
|
+
th.active img {
|
175
|
+
display: inline;
|
176
|
+
}
|
177
|
+
tr.even, tr.odd {
|
178
|
+
background-color: #eee;
|
179
|
+
border-bottom: 1px solid #ccc;
|
180
|
+
}
|
181
|
+
tr.even, tr.odd {
|
182
|
+
padding: 0.1em 0.6em;
|
183
|
+
}
|
184
|
+
td.active {
|
185
|
+
background-color: #ddd;
|
186
|
+
}
|
187
|
+
table td {
|
188
|
+
border:1px solid #aaa;
|
189
|
+
}
|
190
|
+
table tr.line td{
|
191
|
+
border-top: 2px solid black;
|
192
|
+
}
|
193
|
+
|
194
|
+
HERE
|
195
|
+
end
|
196
|
+
|
197
|
+
def create_uls(level)
|
198
|
+
if @c_level!=level
|
199
|
+
if level>@c_level
|
200
|
+
"<ul>\n" * (level-@c_level)
|
201
|
+
else
|
202
|
+
"</ul>\n" * (@c_level-level)
|
203
|
+
end
|
204
|
+
else
|
205
|
+
""
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def parse
|
210
|
+
html="<html><head><title>#{@name}</title><style>#{css()}</style></head><body><h1>Report: #{@name}</h1>"
|
211
|
+
if @anchors.size>0
|
212
|
+
html << "<div class='index'>Index</div><ul>"
|
213
|
+
@c_level=1
|
214
|
+
@anchors.each{|name,level,uniq|
|
215
|
+
html << create_uls(level)
|
216
|
+
@c_level=level
|
217
|
+
html << "<li><a href='#"+uniq.to_s+"'>#{name}</a></li>"
|
218
|
+
}
|
219
|
+
html << create_uls(1)
|
220
|
+
html << "</ul></div>"
|
221
|
+
end
|
222
|
+
html+="<div class='section'>"+@partials.join("</div><div class='section'>")+"</div>"
|
223
|
+
html+="</body></html>"
|
224
|
+
html
|
225
|
+
end
|
226
|
+
def save(filename)
|
227
|
+
File.open(filename,"w") {|fp|
|
228
|
+
fp.write(parse)
|
229
|
+
}
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
@@ -0,0 +1,281 @@
|
|
1
|
+
module Statsample
|
2
|
+
# Multiset joins multiple dataset with the same fields and vectors
|
3
|
+
# but with different number of cases.
|
4
|
+
# This is the base class for stratified and cluster sampling estimation
|
5
|
+
class Multiset
|
6
|
+
attr_reader :fields, :datasets
|
7
|
+
# To create a multiset
|
8
|
+
# * Multiset.new(%w{f1 f2 f3}) # define only fields
|
9
|
+
def initialize(fields)
|
10
|
+
@fields=fields
|
11
|
+
@datasets={}
|
12
|
+
end
|
13
|
+
def self.new_empty_vectors(fields,ds_names)
|
14
|
+
ms=Multiset.new(fields)
|
15
|
+
ds_names.each{|d|
|
16
|
+
ms.add_dataset(d,Dataset.new(fields))
|
17
|
+
}
|
18
|
+
ms
|
19
|
+
end
|
20
|
+
def datasets_names
|
21
|
+
@datasets.keys.sort
|
22
|
+
end
|
23
|
+
def n_datasets
|
24
|
+
@datasets.size
|
25
|
+
end
|
26
|
+
def add_dataset(key,ds)
|
27
|
+
if(ds.fields!=@fields)
|
28
|
+
raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
|
29
|
+
else
|
30
|
+
@datasets[key]=ds
|
31
|
+
end
|
32
|
+
end
|
33
|
+
def sum_field(field)
|
34
|
+
@datasets.inject(0) {|a,da|
|
35
|
+
stratum_name=da[0]
|
36
|
+
vector=da[1][field]
|
37
|
+
val=yield stratum_name,vector
|
38
|
+
a+val
|
39
|
+
}
|
40
|
+
end
|
41
|
+
def collect_vector(field)
|
42
|
+
@datasets.collect {|k,v|
|
43
|
+
yield k, v[field]
|
44
|
+
}
|
45
|
+
end
|
46
|
+
def[](i)
|
47
|
+
@datasets[i]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
class StratifiedSample
|
51
|
+
class << self
|
52
|
+
# mean for an array of vectors
|
53
|
+
def mean(*v)
|
54
|
+
n_total=0
|
55
|
+
a=v.inject(0){|a,v|
|
56
|
+
n_total+=v.size
|
57
|
+
a+v.sum
|
58
|
+
}
|
59
|
+
a.to_f/n_total
|
60
|
+
end
|
61
|
+
|
62
|
+
def standard_error_ksd_wr(es)
|
63
|
+
n_total=0
|
64
|
+
sum=es.inject(0){|a,h|
|
65
|
+
n_total+=h['N']
|
66
|
+
a+((h['N']**2 * h['s']**2) / h['n'].to_f)
|
67
|
+
}
|
68
|
+
(1.to_f / n_total)*Math::sqrt(sum)
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
def variance_ksd_wr(es)
|
73
|
+
standard_error_ksd_wr(es)**2
|
74
|
+
end
|
75
|
+
|
76
|
+
# Source : Cochran (1972)
|
77
|
+
|
78
|
+
def variance_ksd_wor(es)
|
79
|
+
n_total=es.inject(0) {|a,h|
|
80
|
+
a+h['N']
|
81
|
+
}
|
82
|
+
es.inject(0){|a,h|
|
83
|
+
val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
|
84
|
+
a+val
|
85
|
+
}
|
86
|
+
end
|
87
|
+
def standard_error_ksd_wor(es)
|
88
|
+
Math::sqrt(variance_ksd_wor(es))
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
def variance_esd_wor(es)
|
94
|
+
n_total=es.inject(0) {|a,h|
|
95
|
+
a+h['N']
|
96
|
+
}
|
97
|
+
|
98
|
+
sum=es.inject(0){|a,h|
|
99
|
+
val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
|
100
|
+
a+val
|
101
|
+
}
|
102
|
+
(1.0/(n_total**2))*sum
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def standard_error_esd_wor(es)
|
107
|
+
Math::sqrt(variance_ksd_wor(es))
|
108
|
+
end
|
109
|
+
# Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
|
110
|
+
def variance_esd_wr(es)
|
111
|
+
n_total=es.inject(0) {|a,h|
|
112
|
+
a+h['N']
|
113
|
+
}
|
114
|
+
|
115
|
+
sum=es.inject(0){|a,h|
|
116
|
+
val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
|
117
|
+
a+val
|
118
|
+
}
|
119
|
+
(1.0/(n_total**2))*sum
|
120
|
+
end
|
121
|
+
def standard_error_esd_wr(es)
|
122
|
+
Math::sqrt(variance_esd_wr(es))
|
123
|
+
end
|
124
|
+
|
125
|
+
def proportion_variance_ksd_wor(es)
|
126
|
+
n_total=es.inject(0) {|a,h|
|
127
|
+
a+h['N']
|
128
|
+
}
|
129
|
+
|
130
|
+
es.inject(0){|a,h|
|
131
|
+
val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
|
132
|
+
a+val
|
133
|
+
}
|
134
|
+
end
|
135
|
+
def proportion_sd_ksd_wor(es)
|
136
|
+
Math::sqrt(proportion_variance_ksd_wor(es))
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
def proportion_sd_ksd_wr(es)
|
141
|
+
n_total=es.inject(0) {|a,h|
|
142
|
+
a+h['N']
|
143
|
+
}
|
144
|
+
|
145
|
+
sum=es.inject(0){|a,h|
|
146
|
+
val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
|
147
|
+
a+val
|
148
|
+
}
|
149
|
+
Math::sqrt(sum) * (1.0/n_total)
|
150
|
+
end
|
151
|
+
def proportion_variance_ksd_wr(es)
|
152
|
+
proportion_variance_ksd_wor(es)**2
|
153
|
+
end
|
154
|
+
|
155
|
+
def proportion_variance_esd_wor(es)
|
156
|
+
n_total=es.inject(0) {|a,h|
|
157
|
+
a+h['N']
|
158
|
+
}
|
159
|
+
|
160
|
+
sum=es.inject(0){|a,h|
|
161
|
+
a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
|
162
|
+
a+val
|
163
|
+
}
|
164
|
+
Math::sqrt(sum) * (1.0/n_total**2)
|
165
|
+
end
|
166
|
+
def proportion_sd_esd_wor(es)
|
167
|
+
Math::sqrt(proportion_variance_ksd_wor(es))
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
|
172
|
+
end
|
173
|
+
def initialize(ms,strata_sizes)
|
174
|
+
raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
|
175
|
+
@ms=ms
|
176
|
+
raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
|
177
|
+
@strata_sizes=strata_sizes
|
178
|
+
@population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
|
179
|
+
@strata_number=@ms.n_datasets
|
180
|
+
@sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
|
181
|
+
end
|
182
|
+
# Number of strata
|
183
|
+
def strata_number
|
184
|
+
@strata_number
|
185
|
+
end
|
186
|
+
# Population size. Equal to sum of strata sizes
|
187
|
+
# Symbol: N<sub>h</sub>
|
188
|
+
def population_size
|
189
|
+
@population_size
|
190
|
+
end
|
191
|
+
# Sample size. Equal to sum of sample of each stratum
|
192
|
+
def sample_size
|
193
|
+
@sample_size
|
194
|
+
end
|
195
|
+
# Size of stratum x
|
196
|
+
def stratum_size(h)
|
197
|
+
@strata_sizes[h]
|
198
|
+
end
|
199
|
+
def vectors_by_field(field)
|
200
|
+
@ms.datasets.collect{|k,ds|
|
201
|
+
ds[field]
|
202
|
+
}
|
203
|
+
end
|
204
|
+
# Population proportion based on strata
|
205
|
+
def proportion(field, v=1)
|
206
|
+
@ms.sum_field(field) {|s_name,vector|
|
207
|
+
stratum_ponderation(s_name)*vector.proportion(v)
|
208
|
+
}
|
209
|
+
end
|
210
|
+
# Stratum ponderation.
|
211
|
+
# Symbol: W\<sub>h\</sub>
|
212
|
+
def stratum_ponderation(h)
|
213
|
+
@strata_sizes[h].to_f / @population_size
|
214
|
+
end
|
215
|
+
alias_method :wh, :stratum_ponderation
|
216
|
+
|
217
|
+
# Population mean based on strata
|
218
|
+
def mean(field)
|
219
|
+
@ms.sum_field(field) {|s_name,vector|
|
220
|
+
stratum_ponderation(s_name)*vector.mean
|
221
|
+
}
|
222
|
+
end
|
223
|
+
# Standard error with estimated population variance and without replacement.
|
224
|
+
# Source: Cochran (1972)
|
225
|
+
def standard_error_wor(field)
|
226
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
227
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
228
|
+
}
|
229
|
+
|
230
|
+
StratifiedSample.standard_error_esd_wor(es)
|
231
|
+
end
|
232
|
+
|
233
|
+
# Standard error with estimated population variance and without replacement.
|
234
|
+
# Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
|
235
|
+
|
236
|
+
def standard_error_wor_2(field)
|
237
|
+
sum=@ms.sum_field(field) {|s_name,vector|
|
238
|
+
s_size=@strata_sizes[s_name]
|
239
|
+
(s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
|
240
|
+
}
|
241
|
+
(1/@population_size.to_f)*Math::sqrt(sum)
|
242
|
+
end
|
243
|
+
|
244
|
+
def standard_error_wr(field)
|
245
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
246
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
247
|
+
}
|
248
|
+
|
249
|
+
StratifiedSample.standard_error_esd_wr(es)
|
250
|
+
end
|
251
|
+
def proportion_sd_esd_wor(field,v=1)
|
252
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
253
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
|
254
|
+
}
|
255
|
+
|
256
|
+
StratifiedSample.proportion_sd_esd_wor(es)
|
257
|
+
end
|
258
|
+
|
259
|
+
def proportion_standard_error(field,v=1)
|
260
|
+
prop=proportion(field,v)
|
261
|
+
sum=@ms.sum_field(field) {|s_name,vector|
|
262
|
+
nh=vector.size
|
263
|
+
s_size=@strata_sizes[s_name]
|
264
|
+
(s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
|
265
|
+
}
|
266
|
+
(1/@population_size.to_f) * Math::sqrt(sum)
|
267
|
+
end
|
268
|
+
# Cochran(1971), p. 150
|
269
|
+
def variance_pst(field,v=1)
|
270
|
+
sum=@ms.datasets.inject(0) {|a,da|
|
271
|
+
stratum_name=da[0]
|
272
|
+
ds=da[1]
|
273
|
+
nh=ds.cases.to_f
|
274
|
+
s_size=@strata_sizes[stratum_name]
|
275
|
+
prop=ds[field].proportion(v)
|
276
|
+
a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
|
277
|
+
}
|
278
|
+
(1/@population_size.to_f ** 2)*sum
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|