statsample 0.6.3 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -37,12 +37,20 @@ module Statsample
37
37
  @ds=ds
38
38
  @y_var=y_var
39
39
  @n=ds.cases
40
- @fields=ds.fields-[y_var]
41
- @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
40
+
42
41
  @n_samples=0
43
42
  @alpha=ALPHA
44
43
  @debug=false
45
- @regression_class=Regression::Multiple::RubyEngine
44
+ if y_var.is_a? Array
45
+ @fields=ds.fields-y_var
46
+ @regression_class=Regression::Multiple::MultipleDependent
47
+
48
+ else
49
+ @fields=ds.fields-[y_var]
50
+ @regression_class=Regression::Multiple::MatrixEngine
51
+ end
52
+ @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
53
+
46
54
  @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
47
55
  opts.each{|k,v|
48
56
  self.send("#{k}=",v) if self.respond_to? k
@@ -52,10 +60,10 @@ module Statsample
52
60
  # lr_class deprecated
53
61
  alias_method :lr_class, :regression_class
54
62
  def da
55
- if @da.nil?
56
- @da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
57
- end
58
- @da
63
+ if @da.nil?
64
+ @da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
65
+ end
66
+ @da
59
67
  end
60
68
  # Creates n re-samples from original dataset and store result of
61
69
  # each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
@@ -69,6 +77,7 @@ module Statsample
69
77
  puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
70
78
  ds_boot=@ds.bootstrap(n)
71
79
  da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
80
+
72
81
  da_1.total_dominance.each{|k,v|
73
82
  @samples_td[k].push(v)
74
83
  }
@@ -21,22 +21,62 @@ class ::Matrix
21
21
  }
22
22
  GSL::Matrix[*out]
23
23
  end
24
-
24
+
25
25
  # Calculate marginal of rows
26
- def rows_sum
26
+ def row_sum
27
27
  (0...row_size).collect {|i|
28
28
  row(i).to_a.inject(0) {|a,v| a+v}
29
29
  }
30
30
  end
31
31
  # Calculate marginal of columns
32
- def cols_sum
32
+ def column_sum
33
33
  (0...column_size).collect {|i|
34
34
  column(i).to_a.inject(0) {|a,v| a+v}
35
35
  }
36
36
  end
37
+
38
+
39
+ alias :old_par :[]
40
+
41
+ # Select elements and submatrixes
42
+ # Implement row, column and minor in one method
43
+ #
44
+ # * [i,j]:: Element i,j
45
+ # * [i,:*]:: Row i
46
+ # * [:*,j]:: Column j
47
+ # * [i1..i2,j]:: Row i1 to i2, column j
48
+
49
+ def [](*args)
50
+ raise ArgumentError if args.size!=2
51
+ x=args[0]
52
+ y=args[1]
53
+ if x.is_a? Integer and y.is_a? Integer
54
+ @rows[args[0]][args[1]]
55
+ else
56
+ # set ranges according to arguments
57
+
58
+ rx=case x
59
+ when Numeric
60
+ x..x
61
+ when :*
62
+ 0..(row_size-1)
63
+ when Range
64
+ x
65
+ end
66
+ ry=case y
67
+ when Numeric
68
+ y..y
69
+ when :*
70
+ 0..(column_size-1)
71
+ when Range
72
+ y
73
+ end
74
+ Matrix.rows(rx.collect {|i| ry.collect {|j| @rows[i][j]}})
75
+ end
76
+ end
37
77
  # Calculate sum of cells
38
78
  def total_sum
39
- rows_sum.inject(0){|a,v| a+v}
79
+ row_sum.inject(0){|a,v| a+v}
40
80
  end
41
81
  end
42
82
 
@@ -52,26 +92,118 @@ module GSL
52
92
  end
53
93
 
54
94
  module Statsample
55
- attr :labels
56
- attr :name
57
- module CorrelationMatrix
95
+ # Method for variance/covariance and correlation matrices
96
+ module CovariateMatrix
58
97
  def summary
59
98
  rp=ReportBuilder.new()
60
99
  rp.add(self)
61
100
  rp.to_text
62
101
  end
63
- def labels=(v)
64
- @labels=v
102
+ def type=(v)
103
+ @type=v
104
+ end
105
+ def type
106
+ if row_size.times.find {|i| self[i,i]!=1.0}
107
+ :covariance
108
+ else
109
+ :correlation
110
+ end
111
+
112
+ end
113
+ def correlation
114
+ if(type==:covariance)
115
+ matrix=Matrix.rows(row_size.times.collect { |i|
116
+ column_size.times.collect { |j|
117
+ if i==j
118
+ 1.0
119
+ else
120
+ self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
121
+ end
122
+ }
123
+ })
124
+ matrix.extend CovariateMatrix
125
+ matrix.fields_x=fields_x
126
+ matrix.fields_y=fields_y
127
+ matrix.type=:correlation
128
+ matrix
129
+ else
130
+ self
131
+ end
132
+ end
133
+ def fields
134
+ raise "Should be square" if !square?
135
+ @fields_x
136
+ end
137
+ def fields=(v)
138
+ raise "Matrix should be square" if !square?
139
+ @fields_x=v
140
+ @fields_y=v
141
+ end
142
+ def fields_x=(v)
143
+ raise "Size of fields != row_size" if v.size!=row_size
144
+ @fields_x=v
145
+ end
146
+ def fields_y=(v)
147
+ raise "Size of fields != column_size" if v.size!=column_size
148
+ @fields_y=v
149
+ end
150
+ def fields_x
151
+ if @fields_x.nil?
152
+ @fields_x=row_size.times.collect {|i| i}
153
+ end
154
+ @fields_x
155
+ end
156
+ def fields_y
157
+ if @fields_y.nil?
158
+ @fields_y=column_size.times.collect {|i| i}
159
+ end
160
+ @fields_y
65
161
  end
162
+
66
163
  def name=(v)
67
164
  @name=v
68
165
  end
166
+ def name
167
+ @name
168
+ end
169
+ # Select a submatrix of factors. You could use labels or index to select
170
+ # the factors.
171
+ # If you don't specify columns, will be equal to rows
172
+ # Example:
173
+ # a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
174
+ # a.extends CovariateMatrix
175
+ # a.labels=%w{a b c}
176
+ # a.submatrix(%{c a}, %w{b})
177
+ # => Matrix[[0.5],[0.3]]
178
+ # a.submatrix(%{c a})
179
+ # => Matrix[[1.0, 0.2] , [0.2, 1.0]]
180
+ def submatrix(rows,columns=nil)
181
+ columns||=rows
182
+ # Convert all labels on index
183
+ row_index=rows.collect {|v|
184
+ v.is_a?(Numeric) ? v : fields_x.index(v)
185
+ }
186
+ column_index=columns.collect {|v|
187
+ v.is_a?(Numeric) ? v : fields_y.index(v)
188
+ }
189
+
190
+
191
+ fx=row_index.collect {|v| fields_x[v]}
192
+ fy=column_index.collect {|v| fields_y[v]}
193
+
194
+ matrix= Matrix.rows(row_index.collect {|i|
195
+ row=column_index.collect {|j| self[i,j]}})
196
+ matrix.extend CovariateMatrix
197
+ matrix.fields_x=fx
198
+ matrix.fields_y=fy
199
+ matrix.type=type
200
+ matrix
201
+ end
69
202
  def to_reportbuilder(generator)
70
- @name||="Correlation Matrix"
71
- @labels||=row_size.times.collect {|i| i.to_s}
72
- t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+@labels)
203
+ @name||= (type==:correlation ? "Correlation":"Covariance")+" Matrix"
204
+ t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+fields_y)
73
205
  row_size.times {|i|
74
- t.add_row([@labels[i]]+@rows[i].collect {|i| sprintf("%0.3f",i).gsub("0.",".")})
206
+ t.add_row([fields_x[i]]+@rows[i].collect {|i1| sprintf("%0.3f",i1).gsub("0.",".")})
75
207
  }
76
208
  generator.parse_element(t)
77
209
  end
@@ -1,54 +1,54 @@
1
1
  module Statsample
2
- # Multiset joins multiple dataset with the same fields and vectors
3
- # but with different number of cases.
4
- # This is the base class for stratified and cluster sampling estimation
5
- class Multiset
6
- attr_reader :fields, :datasets
7
- # To create a multiset
8
- # * Multiset.new(%w{f1 f2 f3}) # define only fields
9
- def initialize(fields)
10
- @fields=fields
11
- @datasets={}
12
- end
13
- def self.new_empty_vectors(fields,ds_names)
14
- ms=Multiset.new(fields)
15
- ds_names.each{|d|
16
- ms.add_dataset(d,Dataset.new(fields))
17
- }
18
- ms
19
- end
20
- def datasets_names
21
- @datasets.keys.sort
22
- end
23
- def n_datasets
24
- @datasets.size
25
- end
26
- def add_dataset(key,ds)
27
- if(ds.fields!=@fields)
28
- raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
29
- else
30
- @datasets[key]=ds
31
- end
32
- end
33
- def sum_field(field)
34
- @datasets.inject(0) {|a,da|
35
- stratum_name=da[0]
36
- vector=da[1][field]
37
- val=yield stratum_name,vector
38
- a+val
39
- }
40
- end
41
- def collect_vector(field)
42
- @datasets.collect {|k,v|
43
- yield k, v[field]
44
- }
45
- end
46
- def[](i)
47
- @datasets[i]
2
+ # Multiset joins multiple dataset with the same fields and vectors
3
+ # but with different number of cases.
4
+ # This is the base class for stratified and cluster sampling estimation
5
+ class Multiset
6
+ attr_reader :fields, :datasets
7
+ # To create a multiset
8
+ # * Multiset.new(%w{f1 f2 f3}) # define only fields
9
+ def initialize(fields)
10
+ @fields=fields
11
+ @datasets={}
12
+ end
13
+ def self.new_empty_vectors(fields,ds_names)
14
+ ms=Multiset.new(fields)
15
+ ds_names.each{|d|
16
+ ms.add_dataset(d,Dataset.new(fields))
17
+ }
18
+ ms
19
+ end
20
+ def datasets_names
21
+ @datasets.keys.sort
22
+ end
23
+ def n_datasets
24
+ @datasets.size
25
+ end
26
+ def add_dataset(key,ds)
27
+ if(ds.fields!=@fields)
28
+ raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
29
+ else
30
+ @datasets[key]=ds
48
31
  end
49
32
  end
33
+ def sum_field(field)
34
+ @datasets.inject(0) {|a,da|
35
+ stratum_name=da[0]
36
+ vector=da[1][field]
37
+ val=yield stratum_name,vector
38
+ a+val
39
+ }
40
+ end
41
+ def collect_vector(field)
42
+ @datasets.collect {|k,v|
43
+ yield k, v[field]
44
+ }
45
+ end
46
+ def[](i)
47
+ @datasets[i]
48
+ end
49
+ end
50
50
  class StratifiedSample
51
- class << self
51
+ class << self
52
52
  # mean for an array of vectors
53
53
  def mean(*vectors)
54
54
  n_total=0
@@ -59,223 +59,206 @@ module Statsample
59
59
  means.to_f/n_total
60
60
  end
61
61
 
62
- def standard_error_ksd_wr(es)
63
- n_total=0
64
- sum=es.inject(0){|a,h|
65
- n_total+=h['N']
66
- a+((h['N']**2 * h['s']**2) / h['n'].to_f)
67
- }
68
- (1.to_f / n_total)*Math::sqrt(sum)
69
- end
70
-
71
-
72
- def variance_ksd_wr(es)
73
- standard_error_ksd_wr(es)**2
74
- end
75
-
76
- # Source : Cochran (1972)
77
-
78
- def variance_ksd_wor(es)
79
- n_total=es.inject(0) {|a,h|
80
- a+h['N']
81
- }
82
- es.inject(0){|a,h|
83
- val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
84
- a+val
85
- }
86
- end
87
- def standard_error_ksd_wor(es)
88
- Math::sqrt(variance_ksd_wor(es))
89
- end
90
-
91
-
92
-
93
- def variance_esd_wor(es)
94
- n_total=es.inject(0) {|a,h|
95
- a+h['N']
96
- }
97
-
98
- sum=es.inject(0){|a,h|
99
- val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
100
- a+val
101
- }
102
- (1.0/(n_total**2))*sum
103
- end
104
-
105
-
106
- def standard_error_esd_wor(es)
107
- Math::sqrt(variance_ksd_wor(es))
108
- end
109
- # Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
110
- def variance_esd_wr(es)
111
- n_total=es.inject(0) {|a,h|
112
- a+h['N']
113
- }
114
-
115
- sum=es.inject(0){|a,h|
116
- val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
117
- a+val
118
- }
119
- (1.0/(n_total**2))*sum
120
- end
121
- def standard_error_esd_wr(es)
122
- Math::sqrt(variance_esd_wr(es))
123
- end
124
-
125
- def proportion_variance_ksd_wor(es)
126
- n_total=es.inject(0) {|a,h|
127
- a+h['N']
128
- }
129
-
130
- es.inject(0){|a,h|
131
- val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
132
- a+val
133
- }
134
- end
135
- def proportion_sd_ksd_wor(es)
136
- Math::sqrt(proportion_variance_ksd_wor(es))
137
- end
138
-
139
-
140
- def proportion_sd_ksd_wr(es)
141
- n_total=es.inject(0) {|a,h|
142
- a+h['N']
143
- }
144
-
145
- sum=es.inject(0){|a,h|
146
- val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
147
- a+val
148
- }
149
- Math::sqrt(sum) * (1.0/n_total)
150
- end
151
- def proportion_variance_ksd_wr(es)
152
- proportion_variance_ksd_wor(es)**2
153
- end
154
-
155
- def proportion_variance_esd_wor(es)
156
- n_total=es.inject(0) {|a,h|
157
- a+h['N']
158
- }
159
-
160
- sum=es.inject(0){|a,h|
161
- a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
162
- a+val
163
- }
164
- Math::sqrt(sum) * (1.0/n_total**2)
165
- end
166
- def proportion_sd_esd_wor(es)
167
- Math::sqrt(proportion_variance_ksd_wor(es))
168
- end
169
-
170
-
171
-
62
+ def standard_error_ksd_wr(es)
63
+ n_total=0
64
+ sum=es.inject(0){|a,h|
65
+ n_total+=h['N']
66
+ a+((h['N']**2 * h['s']**2) / h['n'].to_f)
67
+ }
68
+ (1.to_f / n_total)*Math::sqrt(sum)
69
+ end
70
+
71
+
72
+ def variance_ksd_wr(es)
73
+ standard_error_ksd_wr(es)**2
74
+ end
75
+ def calculate_n_total(es)
76
+ es.inject(0) {|a,h| a+h['N'] }
77
+ end
78
+ # Source : Cochran (1972)
79
+
80
+ def variance_ksd_wor(es)
81
+ n_total=calculate_n_total(es)
82
+ es.inject(0){|a,h|
83
+ val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
84
+ a+val
85
+ }
86
+ end
87
+ def standard_error_ksd_wor(es)
88
+ Math::sqrt(variance_ksd_wor(es))
89
+ end
90
+
91
+
92
+
93
+ def variance_esd_wor(es)
94
+ n_total=calculate_n_total(es)
95
+ sum=es.inject(0){|a,h|
96
+ val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
97
+ a+val
98
+ }
99
+ (1.0/(n_total**2))*sum
100
+ end
101
+
102
+
103
+ def standard_error_esd_wor(es)
104
+ Math::sqrt(variance_ksd_wor(es))
105
+ end
106
+ # Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
107
+ def variance_esd_wr(es)
108
+ n_total=calculate_n_total(es)
109
+ sum=es.inject(0){|a,h|
110
+ val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
111
+ a+val
112
+ }
113
+ (1.0/(n_total**2))*sum
114
+ end
115
+ def standard_error_esd_wr(es)
116
+ Math::sqrt(variance_esd_wr(es))
117
+ end
118
+
119
+ def proportion_variance_ksd_wor(es)
120
+ n_total=calculate_n_total(es)
121
+ es.inject(0){|a,h|
122
+ val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
123
+ a+val
124
+ }
125
+ end
126
+ def proportion_sd_ksd_wor(es)
127
+ Math::sqrt(proportion_variance_ksd_wor(es))
128
+ end
129
+
130
+
131
+ def proportion_sd_ksd_wr(es)
132
+ n_total=calculate_n_total(es)
133
+ sum=es.inject(0){|a,h|
134
+ val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
135
+ a+val
136
+ }
137
+ Math::sqrt(sum) * (1.0/n_total)
138
+ end
139
+ def proportion_variance_ksd_wr(es)
140
+ proportion_variance_ksd_wor(es)**2
141
+ end
142
+
143
+ def proportion_variance_esd_wor(es)
144
+ n_total=n_total=calculate_n_total(es)
145
+
146
+ sum=es.inject(0){|a,h|
147
+ a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
148
+ a+val
149
+ }
150
+ Math::sqrt(sum) * (1.0/n_total**2)
151
+ end
152
+ def proportion_sd_esd_wor(es)
153
+ Math::sqrt(proportion_variance_ksd_wor(es))
154
+ end
172
155
  end
173
- def initialize(ms,strata_sizes)
174
- raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
175
- @ms=ms
176
- raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
177
- @strata_sizes=strata_sizes
178
- @population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
179
- @strata_number=@ms.n_datasets
180
- @sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
181
- end
182
- # Number of strata
183
- def strata_number
184
- @strata_number
185
- end
186
- # Population size. Equal to sum of strata sizes
187
- # Symbol: N<sub>h</sub>
188
- def population_size
189
- @population_size
190
- end
191
- # Sample size. Equal to sum of sample of each stratum
192
- def sample_size
193
- @sample_size
194
- end
195
- # Size of stratum x
196
- def stratum_size(h)
197
- @strata_sizes[h]
198
- end
199
- def vectors_by_field(field)
200
- @ms.datasets.collect{|k,ds|
201
- ds[field]
202
- }
203
- end
204
- # Population proportion based on strata
205
- def proportion(field, v=1)
206
- @ms.sum_field(field) {|s_name,vector|
207
- stratum_ponderation(s_name)*vector.proportion(v)
208
- }
209
- end
210
- # Stratum ponderation.
211
- # Symbol: W\<sub>h\</sub>
212
- def stratum_ponderation(h)
213
- @strata_sizes[h].to_f / @population_size
214
- end
215
- alias_method :wh, :stratum_ponderation
216
-
217
- # Population mean based on strata
218
- def mean(field)
219
- @ms.sum_field(field) {|s_name,vector|
220
- stratum_ponderation(s_name)*vector.mean
221
- }
222
- end
223
- # Standard error with estimated population variance and without replacement.
224
- # Source: Cochran (1972)
225
- def standard_error_wor(field)
226
- es=@ms.collect_vector(field) {|s_n, vector|
227
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
228
- }
229
-
230
- StratifiedSample.standard_error_esd_wor(es)
231
- end
232
-
233
- # Standard error with estimated population variance and without replacement.
234
- # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
235
-
236
- def standard_error_wor_2(field)
237
- sum=@ms.sum_field(field) {|s_name,vector|
238
- s_size=@strata_sizes[s_name]
239
- (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
240
- }
241
- (1/@population_size.to_f)*Math::sqrt(sum)
242
- end
243
-
244
- def standard_error_wr(field)
245
- es=@ms.collect_vector(field) {|s_n, vector|
246
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
247
- }
248
-
249
- StratifiedSample.standard_error_esd_wr(es)
250
- end
251
- def proportion_sd_esd_wor(field,v=1)
252
- es=@ms.collect_vector(field) {|s_n, vector|
253
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
254
- }
255
-
256
- StratifiedSample.proportion_sd_esd_wor(es)
257
- end
258
-
259
- def proportion_standard_error(field,v=1)
260
- prop=proportion(field,v)
261
- sum=@ms.sum_field(field) {|s_name,vector|
262
- nh=vector.size
263
- s_size=@strata_sizes[s_name]
264
- (s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
265
- }
266
- (1.quo(@population_size)) * Math::sqrt(sum)
267
- end
268
- # Cochran(1971), p. 150
269
- def variance_pst(field,v=1)
270
- sum=@ms.datasets.inject(0) {|a,da|
271
- stratum_name=da[0]
272
- ds=da[1]
273
- nh=ds.cases.to_f
274
- s_size=@strata_sizes[stratum_name]
275
- prop=ds[field].proportion(v)
276
- a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
277
- }
278
- (1/@population_size.to_f ** 2)*sum
279
- end
280
- end
156
+ def initialize(ms,strata_sizes)
157
+ raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
158
+ @ms=ms
159
+ raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
160
+ @strata_sizes=strata_sizes
161
+ @population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
162
+ @strata_number=@ms.n_datasets
163
+ @sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
164
+ end
165
+ # Number of strata
166
+ def strata_number
167
+ @strata_number
168
+ end
169
+ # Population size. Equal to sum of strata sizes
170
+ # Symbol: N<sub>h</sub>
171
+ def population_size
172
+ @population_size
173
+ end
174
+ # Sample size. Equal to sum of sample of each stratum
175
+ def sample_size
176
+ @sample_size
177
+ end
178
+ # Size of stratum x
179
+ def stratum_size(h)
180
+ @strata_sizes[h]
181
+ end
182
+ def vectors_by_field(field)
183
+ @ms.datasets.collect{|k,ds|
184
+ ds[field]
185
+ }
186
+ end
187
+ # Population proportion based on strata
188
+ def proportion(field, v=1)
189
+ @ms.sum_field(field) {|s_name,vector|
190
+ stratum_ponderation(s_name)*vector.proportion(v)
191
+ }
192
+ end
193
+ # Stratum ponderation.
194
+ # Symbol: W\<sub>h\</sub>
195
+ def stratum_ponderation(h)
196
+ @strata_sizes[h].to_f / @population_size
197
+ end
198
+ alias_method :wh, :stratum_ponderation
199
+
200
+ # Population mean based on strata
201
+ def mean(field)
202
+ @ms.sum_field(field) {|s_name,vector|
203
+ stratum_ponderation(s_name)*vector.mean
204
+ }
205
+ end
206
+ # Standard error with estimated population variance and without replacement.
207
+ # Source: Cochran (1972)
208
+ def standard_error_wor(field)
209
+ es=@ms.collect_vector(field) {|s_n, vector|
210
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
211
+ }
212
+
213
+ StratifiedSample.standard_error_esd_wor(es)
214
+ end
215
+
216
+ # Standard error with estimated population variance and without replacement.
217
+ # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
218
+
219
+ def standard_error_wor_2(field)
220
+ sum=@ms.sum_field(field) {|s_name,vector|
221
+ s_size=@strata_sizes[s_name]
222
+ (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
223
+ }
224
+ (1/@population_size.to_f)*Math::sqrt(sum)
225
+ end
226
+
227
+ def standard_error_wr(field)
228
+ es=@ms.collect_vector(field) {|s_n, vector|
229
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
230
+ }
231
+
232
+ StratifiedSample.standard_error_esd_wr(es)
233
+ end
234
+ def proportion_sd_esd_wor(field,v=1)
235
+ es=@ms.collect_vector(field) {|s_n, vector|
236
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
237
+ }
238
+
239
+ StratifiedSample.proportion_sd_esd_wor(es)
240
+ end
241
+
242
+ def proportion_standard_error(field,v=1)
243
+ prop=proportion(field,v)
244
+ sum=@ms.sum_field(field) {|s_name,vector|
245
+ nh=vector.size
246
+ s_size=@strata_sizes[s_name]
247
+ (s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
248
+ }
249
+ (1.quo(@population_size)) * Math::sqrt(sum)
250
+ end
251
+ # Cochran(1971), p. 150
252
+ def variance_pst(field,v=1)
253
+ sum=@ms.datasets.inject(0) {|a,da|
254
+ stratum_name=da[0]
255
+ ds=da[1]
256
+ nh=ds.cases.to_f
257
+ s_size=@strata_sizes[stratum_name]
258
+ prop=ds[field].proportion(v)
259
+ a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
260
+ }
261
+ (1/@population_size.to_f ** 2)*sum
262
+ end
263
+ end
281
264
  end