statsample 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,12 +37,20 @@ module Statsample
37
37
  @ds=ds
38
38
  @y_var=y_var
39
39
  @n=ds.cases
40
- @fields=ds.fields-[y_var]
41
- @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
40
+
42
41
  @n_samples=0
43
42
  @alpha=ALPHA
44
43
  @debug=false
45
- @regression_class=Regression::Multiple::RubyEngine
44
+ if y_var.is_a? Array
45
+ @fields=ds.fields-y_var
46
+ @regression_class=Regression::Multiple::MultipleDependent
47
+
48
+ else
49
+ @fields=ds.fields-[y_var]
50
+ @regression_class=Regression::Multiple::MatrixEngine
51
+ end
52
+ @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
53
+
46
54
  @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
47
55
  opts.each{|k,v|
48
56
  self.send("#{k}=",v) if self.respond_to? k
@@ -52,10 +60,10 @@ module Statsample
52
60
  # lr_class deprecated
53
61
  alias_method :lr_class, :regression_class
54
62
  def da
55
- if @da.nil?
56
- @da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
57
- end
58
- @da
63
+ if @da.nil?
64
+ @da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
65
+ end
66
+ @da
59
67
  end
60
68
  # Creates n re-samples from original dataset and store result of
61
69
  # each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
@@ -69,6 +77,7 @@ module Statsample
69
77
  puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug
70
78
  ds_boot=@ds.bootstrap(n)
71
79
  da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
80
+
72
81
  da_1.total_dominance.each{|k,v|
73
82
  @samples_td[k].push(v)
74
83
  }
@@ -21,22 +21,62 @@ class ::Matrix
21
21
  }
22
22
  GSL::Matrix[*out]
23
23
  end
24
-
24
+
25
25
  # Calculate marginal of rows
26
- def rows_sum
26
+ def row_sum
27
27
  (0...row_size).collect {|i|
28
28
  row(i).to_a.inject(0) {|a,v| a+v}
29
29
  }
30
30
  end
31
31
  # Calculate marginal of columns
32
- def cols_sum
32
+ def column_sum
33
33
  (0...column_size).collect {|i|
34
34
  column(i).to_a.inject(0) {|a,v| a+v}
35
35
  }
36
36
  end
37
+
38
+
39
+ alias :old_par :[]
40
+
41
+ # Select elements and submatrixes
42
+ # Implement row, column and minor in one method
43
+ #
44
+ # * [i,j]:: Element i,j
45
+ # * [i,:*]:: Row i
46
+ # * [:*,j]:: Column j
47
+ # * [i1..i2,j]:: Row i1 to i2, column j
48
+
49
+ def [](*args)
50
+ raise ArgumentError if args.size!=2
51
+ x=args[0]
52
+ y=args[1]
53
+ if x.is_a? Integer and y.is_a? Integer
54
+ @rows[args[0]][args[1]]
55
+ else
56
+ # set ranges according to arguments
57
+
58
+ rx=case x
59
+ when Numeric
60
+ x..x
61
+ when :*
62
+ 0..(row_size-1)
63
+ when Range
64
+ x
65
+ end
66
+ ry=case y
67
+ when Numeric
68
+ y..y
69
+ when :*
70
+ 0..(column_size-1)
71
+ when Range
72
+ y
73
+ end
74
+ Matrix.rows(rx.collect {|i| ry.collect {|j| @rows[i][j]}})
75
+ end
76
+ end
37
77
  # Calculate sum of cells
38
78
  def total_sum
39
- rows_sum.inject(0){|a,v| a+v}
79
+ row_sum.inject(0){|a,v| a+v}
40
80
  end
41
81
  end
42
82
 
@@ -52,26 +92,118 @@ module GSL
52
92
  end
53
93
 
54
94
  module Statsample
55
- attr :labels
56
- attr :name
57
- module CorrelationMatrix
95
+ # Method for variance/covariance and correlation matrices
96
+ module CovariateMatrix
58
97
  def summary
59
98
  rp=ReportBuilder.new()
60
99
  rp.add(self)
61
100
  rp.to_text
62
101
  end
63
- def labels=(v)
64
- @labels=v
102
+ def type=(v)
103
+ @type=v
104
+ end
105
+ def type
106
+ if row_size.times.find {|i| self[i,i]!=1.0}
107
+ :covariance
108
+ else
109
+ :correlation
110
+ end
111
+
112
+ end
113
+ def correlation
114
+ if(type==:covariance)
115
+ matrix=Matrix.rows(row_size.times.collect { |i|
116
+ column_size.times.collect { |j|
117
+ if i==j
118
+ 1.0
119
+ else
120
+ self[i,j].quo(Math::sqrt(self[i,i])*Math::sqrt(self[j,j]))
121
+ end
122
+ }
123
+ })
124
+ matrix.extend CovariateMatrix
125
+ matrix.fields_x=fields_x
126
+ matrix.fields_y=fields_y
127
+ matrix.type=:correlation
128
+ matrix
129
+ else
130
+ self
131
+ end
132
+ end
133
+ def fields
134
+ raise "Should be square" if !square?
135
+ @fields_x
136
+ end
137
+ def fields=(v)
138
+ raise "Matrix should be square" if !square?
139
+ @fields_x=v
140
+ @fields_y=v
141
+ end
142
+ def fields_x=(v)
143
+ raise "Size of fields != row_size" if v.size!=row_size
144
+ @fields_x=v
145
+ end
146
+ def fields_y=(v)
147
+ raise "Size of fields != column_size" if v.size!=column_size
148
+ @fields_y=v
149
+ end
150
+ def fields_x
151
+ if @fields_x.nil?
152
+ @fields_x=row_size.times.collect {|i| i}
153
+ end
154
+ @fields_x
155
+ end
156
+ def fields_y
157
+ if @fields_y.nil?
158
+ @fields_y=column_size.times.collect {|i| i}
159
+ end
160
+ @fields_y
65
161
  end
162
+
66
163
  def name=(v)
67
164
  @name=v
68
165
  end
166
+ def name
167
+ @name
168
+ end
169
+ # Select a submatrix of factors. You could use labels or index to select
170
+ # the factors.
171
+ # If you don't specify columns, will be equal to rows
172
+ # Example:
173
+ # a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
174
+ # a.extends CovariateMatrix
175
+ # a.labels=%w{a b c}
176
+ # a.submatrix(%{c a}, %w{b})
177
+ # => Matrix[[0.5],[0.3]]
178
+ # a.submatrix(%{c a})
179
+ # => Matrix[[1.0, 0.2] , [0.2, 1.0]]
180
+ def submatrix(rows,columns=nil)
181
+ columns||=rows
182
+ # Convert all labels on index
183
+ row_index=rows.collect {|v|
184
+ v.is_a?(Numeric) ? v : fields_x.index(v)
185
+ }
186
+ column_index=columns.collect {|v|
187
+ v.is_a?(Numeric) ? v : fields_y.index(v)
188
+ }
189
+
190
+
191
+ fx=row_index.collect {|v| fields_x[v]}
192
+ fy=column_index.collect {|v| fields_y[v]}
193
+
194
+ matrix= Matrix.rows(row_index.collect {|i|
195
+ row=column_index.collect {|j| self[i,j]}})
196
+ matrix.extend CovariateMatrix
197
+ matrix.fields_x=fx
198
+ matrix.fields_y=fy
199
+ matrix.type=type
200
+ matrix
201
+ end
69
202
  def to_reportbuilder(generator)
70
- @name||="Correlation Matrix"
71
- @labels||=row_size.times.collect {|i| i.to_s}
72
- t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+@labels)
203
+ @name||= (type==:correlation ? "Correlation":"Covariance")+" Matrix"
204
+ t=ReportBuilder::Table.new(:name=>@name, :header=>[""]+fields_y)
73
205
  row_size.times {|i|
74
- t.add_row([@labels[i]]+@rows[i].collect {|i| sprintf("%0.3f",i).gsub("0.",".")})
206
+ t.add_row([fields_x[i]]+@rows[i].collect {|i1| sprintf("%0.3f",i1).gsub("0.",".")})
75
207
  }
76
208
  generator.parse_element(t)
77
209
  end
@@ -1,54 +1,54 @@
1
1
  module Statsample
2
- # Multiset joins multiple dataset with the same fields and vectors
3
- # but with different number of cases.
4
- # This is the base class for stratified and cluster sampling estimation
5
- class Multiset
6
- attr_reader :fields, :datasets
7
- # To create a multiset
8
- # * Multiset.new(%w{f1 f2 f3}) # define only fields
9
- def initialize(fields)
10
- @fields=fields
11
- @datasets={}
12
- end
13
- def self.new_empty_vectors(fields,ds_names)
14
- ms=Multiset.new(fields)
15
- ds_names.each{|d|
16
- ms.add_dataset(d,Dataset.new(fields))
17
- }
18
- ms
19
- end
20
- def datasets_names
21
- @datasets.keys.sort
22
- end
23
- def n_datasets
24
- @datasets.size
25
- end
26
- def add_dataset(key,ds)
27
- if(ds.fields!=@fields)
28
- raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
29
- else
30
- @datasets[key]=ds
31
- end
32
- end
33
- def sum_field(field)
34
- @datasets.inject(0) {|a,da|
35
- stratum_name=da[0]
36
- vector=da[1][field]
37
- val=yield stratum_name,vector
38
- a+val
39
- }
40
- end
41
- def collect_vector(field)
42
- @datasets.collect {|k,v|
43
- yield k, v[field]
44
- }
45
- end
46
- def[](i)
47
- @datasets[i]
2
+ # Multiset joins multiple dataset with the same fields and vectors
3
+ # but with different number of cases.
4
+ # This is the base class for stratified and cluster sampling estimation
5
+ class Multiset
6
+ attr_reader :fields, :datasets
7
+ # To create a multiset
8
+ # * Multiset.new(%w{f1 f2 f3}) # define only fields
9
+ def initialize(fields)
10
+ @fields=fields
11
+ @datasets={}
12
+ end
13
+ def self.new_empty_vectors(fields,ds_names)
14
+ ms=Multiset.new(fields)
15
+ ds_names.each{|d|
16
+ ms.add_dataset(d,Dataset.new(fields))
17
+ }
18
+ ms
19
+ end
20
+ def datasets_names
21
+ @datasets.keys.sort
22
+ end
23
+ def n_datasets
24
+ @datasets.size
25
+ end
26
+ def add_dataset(key,ds)
27
+ if(ds.fields!=@fields)
28
+ raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
29
+ else
30
+ @datasets[key]=ds
48
31
  end
49
32
  end
33
+ def sum_field(field)
34
+ @datasets.inject(0) {|a,da|
35
+ stratum_name=da[0]
36
+ vector=da[1][field]
37
+ val=yield stratum_name,vector
38
+ a+val
39
+ }
40
+ end
41
+ def collect_vector(field)
42
+ @datasets.collect {|k,v|
43
+ yield k, v[field]
44
+ }
45
+ end
46
+ def[](i)
47
+ @datasets[i]
48
+ end
49
+ end
50
50
  class StratifiedSample
51
- class << self
51
+ class << self
52
52
  # mean for an array of vectors
53
53
  def mean(*vectors)
54
54
  n_total=0
@@ -59,223 +59,206 @@ module Statsample
59
59
  means.to_f/n_total
60
60
  end
61
61
 
62
- def standard_error_ksd_wr(es)
63
- n_total=0
64
- sum=es.inject(0){|a,h|
65
- n_total+=h['N']
66
- a+((h['N']**2 * h['s']**2) / h['n'].to_f)
67
- }
68
- (1.to_f / n_total)*Math::sqrt(sum)
69
- end
70
-
71
-
72
- def variance_ksd_wr(es)
73
- standard_error_ksd_wr(es)**2
74
- end
75
-
76
- # Source : Cochran (1972)
77
-
78
- def variance_ksd_wor(es)
79
- n_total=es.inject(0) {|a,h|
80
- a+h['N']
81
- }
82
- es.inject(0){|a,h|
83
- val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
84
- a+val
85
- }
86
- end
87
- def standard_error_ksd_wor(es)
88
- Math::sqrt(variance_ksd_wor(es))
89
- end
90
-
91
-
92
-
93
- def variance_esd_wor(es)
94
- n_total=es.inject(0) {|a,h|
95
- a+h['N']
96
- }
97
-
98
- sum=es.inject(0){|a,h|
99
- val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
100
- a+val
101
- }
102
- (1.0/(n_total**2))*sum
103
- end
104
-
105
-
106
- def standard_error_esd_wor(es)
107
- Math::sqrt(variance_ksd_wor(es))
108
- end
109
- # Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
110
- def variance_esd_wr(es)
111
- n_total=es.inject(0) {|a,h|
112
- a+h['N']
113
- }
114
-
115
- sum=es.inject(0){|a,h|
116
- val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
117
- a+val
118
- }
119
- (1.0/(n_total**2))*sum
120
- end
121
- def standard_error_esd_wr(es)
122
- Math::sqrt(variance_esd_wr(es))
123
- end
124
-
125
- def proportion_variance_ksd_wor(es)
126
- n_total=es.inject(0) {|a,h|
127
- a+h['N']
128
- }
129
-
130
- es.inject(0){|a,h|
131
- val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
132
- a+val
133
- }
134
- end
135
- def proportion_sd_ksd_wor(es)
136
- Math::sqrt(proportion_variance_ksd_wor(es))
137
- end
138
-
139
-
140
- def proportion_sd_ksd_wr(es)
141
- n_total=es.inject(0) {|a,h|
142
- a+h['N']
143
- }
144
-
145
- sum=es.inject(0){|a,h|
146
- val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
147
- a+val
148
- }
149
- Math::sqrt(sum) * (1.0/n_total)
150
- end
151
- def proportion_variance_ksd_wr(es)
152
- proportion_variance_ksd_wor(es)**2
153
- end
154
-
155
- def proportion_variance_esd_wor(es)
156
- n_total=es.inject(0) {|a,h|
157
- a+h['N']
158
- }
159
-
160
- sum=es.inject(0){|a,h|
161
- a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
162
- a+val
163
- }
164
- Math::sqrt(sum) * (1.0/n_total**2)
165
- end
166
- def proportion_sd_esd_wor(es)
167
- Math::sqrt(proportion_variance_ksd_wor(es))
168
- end
169
-
170
-
171
-
62
+ def standard_error_ksd_wr(es)
63
+ n_total=0
64
+ sum=es.inject(0){|a,h|
65
+ n_total+=h['N']
66
+ a+((h['N']**2 * h['s']**2) / h['n'].to_f)
67
+ }
68
+ (1.to_f / n_total)*Math::sqrt(sum)
69
+ end
70
+
71
+
72
+ def variance_ksd_wr(es)
73
+ standard_error_ksd_wr(es)**2
74
+ end
75
+ def calculate_n_total(es)
76
+ es.inject(0) {|a,h| a+h['N'] }
77
+ end
78
+ # Source : Cochran (1972)
79
+
80
+ def variance_ksd_wor(es)
81
+ n_total=calculate_n_total(es)
82
+ es.inject(0){|a,h|
83
+ val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
84
+ a+val
85
+ }
86
+ end
87
+ def standard_error_ksd_wor(es)
88
+ Math::sqrt(variance_ksd_wor(es))
89
+ end
90
+
91
+
92
+
93
+ def variance_esd_wor(es)
94
+ n_total=calculate_n_total(es)
95
+ sum=es.inject(0){|a,h|
96
+ val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
97
+ a+val
98
+ }
99
+ (1.0/(n_total**2))*sum
100
+ end
101
+
102
+
103
+ def standard_error_esd_wor(es)
104
+ Math::sqrt(variance_ksd_wor(es))
105
+ end
106
+ # Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
107
+ def variance_esd_wr(es)
108
+ n_total=calculate_n_total(es)
109
+ sum=es.inject(0){|a,h|
110
+ val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
111
+ a+val
112
+ }
113
+ (1.0/(n_total**2))*sum
114
+ end
115
+ def standard_error_esd_wr(es)
116
+ Math::sqrt(variance_esd_wr(es))
117
+ end
118
+
119
+ def proportion_variance_ksd_wor(es)
120
+ n_total=calculate_n_total(es)
121
+ es.inject(0){|a,h|
122
+ val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
123
+ a+val
124
+ }
125
+ end
126
+ def proportion_sd_ksd_wor(es)
127
+ Math::sqrt(proportion_variance_ksd_wor(es))
128
+ end
129
+
130
+
131
+ def proportion_sd_ksd_wr(es)
132
+ n_total=calculate_n_total(es)
133
+ sum=es.inject(0){|a,h|
134
+ val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
135
+ a+val
136
+ }
137
+ Math::sqrt(sum) * (1.0/n_total)
138
+ end
139
+ def proportion_variance_ksd_wr(es)
140
+ proportion_variance_ksd_wor(es)**2
141
+ end
142
+
143
+ def proportion_variance_esd_wor(es)
144
+ n_total=n_total=calculate_n_total(es)
145
+
146
+ sum=es.inject(0){|a,h|
147
+ a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
148
+ a+val
149
+ }
150
+ Math::sqrt(sum) * (1.0/n_total**2)
151
+ end
152
+ def proportion_sd_esd_wor(es)
153
+ Math::sqrt(proportion_variance_ksd_wor(es))
154
+ end
172
155
  end
173
- def initialize(ms,strata_sizes)
174
- raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
175
- @ms=ms
176
- raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
177
- @strata_sizes=strata_sizes
178
- @population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
179
- @strata_number=@ms.n_datasets
180
- @sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
181
- end
182
- # Number of strata
183
- def strata_number
184
- @strata_number
185
- end
186
- # Population size. Equal to sum of strata sizes
187
- # Symbol: N<sub>h</sub>
188
- def population_size
189
- @population_size
190
- end
191
- # Sample size. Equal to sum of sample of each stratum
192
- def sample_size
193
- @sample_size
194
- end
195
- # Size of stratum x
196
- def stratum_size(h)
197
- @strata_sizes[h]
198
- end
199
- def vectors_by_field(field)
200
- @ms.datasets.collect{|k,ds|
201
- ds[field]
202
- }
203
- end
204
- # Population proportion based on strata
205
- def proportion(field, v=1)
206
- @ms.sum_field(field) {|s_name,vector|
207
- stratum_ponderation(s_name)*vector.proportion(v)
208
- }
209
- end
210
- # Stratum ponderation.
211
- # Symbol: W\<sub>h\</sub>
212
- def stratum_ponderation(h)
213
- @strata_sizes[h].to_f / @population_size
214
- end
215
- alias_method :wh, :stratum_ponderation
216
-
217
- # Population mean based on strata
218
- def mean(field)
219
- @ms.sum_field(field) {|s_name,vector|
220
- stratum_ponderation(s_name)*vector.mean
221
- }
222
- end
223
- # Standard error with estimated population variance and without replacement.
224
- # Source: Cochran (1972)
225
- def standard_error_wor(field)
226
- es=@ms.collect_vector(field) {|s_n, vector|
227
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
228
- }
229
-
230
- StratifiedSample.standard_error_esd_wor(es)
231
- end
232
-
233
- # Standard error with estimated population variance and without replacement.
234
- # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
235
-
236
- def standard_error_wor_2(field)
237
- sum=@ms.sum_field(field) {|s_name,vector|
238
- s_size=@strata_sizes[s_name]
239
- (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
240
- }
241
- (1/@population_size.to_f)*Math::sqrt(sum)
242
- end
243
-
244
- def standard_error_wr(field)
245
- es=@ms.collect_vector(field) {|s_n, vector|
246
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
247
- }
248
-
249
- StratifiedSample.standard_error_esd_wr(es)
250
- end
251
- def proportion_sd_esd_wor(field,v=1)
252
- es=@ms.collect_vector(field) {|s_n, vector|
253
- {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
254
- }
255
-
256
- StratifiedSample.proportion_sd_esd_wor(es)
257
- end
258
-
259
- def proportion_standard_error(field,v=1)
260
- prop=proportion(field,v)
261
- sum=@ms.sum_field(field) {|s_name,vector|
262
- nh=vector.size
263
- s_size=@strata_sizes[s_name]
264
- (s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
265
- }
266
- (1.quo(@population_size)) * Math::sqrt(sum)
267
- end
268
- # Cochran(1971), p. 150
269
- def variance_pst(field,v=1)
270
- sum=@ms.datasets.inject(0) {|a,da|
271
- stratum_name=da[0]
272
- ds=da[1]
273
- nh=ds.cases.to_f
274
- s_size=@strata_sizes[stratum_name]
275
- prop=ds[field].proportion(v)
276
- a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
277
- }
278
- (1/@population_size.to_f ** 2)*sum
279
- end
280
- end
156
+ def initialize(ms,strata_sizes)
157
+ raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
158
+ @ms=ms
159
+ raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
160
+ @strata_sizes=strata_sizes
161
+ @population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
162
+ @strata_number=@ms.n_datasets
163
+ @sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
164
+ end
165
+ # Number of strata
166
+ def strata_number
167
+ @strata_number
168
+ end
169
+ # Population size. Equal to sum of strata sizes
170
+ # Symbol: N<sub>h</sub>
171
+ def population_size
172
+ @population_size
173
+ end
174
+ # Sample size. Equal to sum of sample of each stratum
175
+ def sample_size
176
+ @sample_size
177
+ end
178
+ # Size of stratum x
179
+ def stratum_size(h)
180
+ @strata_sizes[h]
181
+ end
182
+ def vectors_by_field(field)
183
+ @ms.datasets.collect{|k,ds|
184
+ ds[field]
185
+ }
186
+ end
187
+ # Population proportion based on strata
188
+ def proportion(field, v=1)
189
+ @ms.sum_field(field) {|s_name,vector|
190
+ stratum_ponderation(s_name)*vector.proportion(v)
191
+ }
192
+ end
193
+ # Stratum ponderation.
194
+ # Symbol: W\<sub>h\</sub>
195
+ def stratum_ponderation(h)
196
+ @strata_sizes[h].to_f / @population_size
197
+ end
198
+ alias_method :wh, :stratum_ponderation
199
+
200
+ # Population mean based on strata
201
+ def mean(field)
202
+ @ms.sum_field(field) {|s_name,vector|
203
+ stratum_ponderation(s_name)*vector.mean
204
+ }
205
+ end
206
+ # Standard error with estimated population variance and without replacement.
207
+ # Source: Cochran (1972)
208
+ def standard_error_wor(field)
209
+ es=@ms.collect_vector(field) {|s_n, vector|
210
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
211
+ }
212
+
213
+ StratifiedSample.standard_error_esd_wor(es)
214
+ end
215
+
216
+ # Standard error with estimated population variance and without replacement.
217
+ # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
218
+
219
+ def standard_error_wor_2(field)
220
+ sum=@ms.sum_field(field) {|s_name,vector|
221
+ s_size=@strata_sizes[s_name]
222
+ (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
223
+ }
224
+ (1/@population_size.to_f)*Math::sqrt(sum)
225
+ end
226
+
227
+ def standard_error_wr(field)
228
+ es=@ms.collect_vector(field) {|s_n, vector|
229
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
230
+ }
231
+
232
+ StratifiedSample.standard_error_esd_wr(es)
233
+ end
234
+ def proportion_sd_esd_wor(field,v=1)
235
+ es=@ms.collect_vector(field) {|s_n, vector|
236
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
237
+ }
238
+
239
+ StratifiedSample.proportion_sd_esd_wor(es)
240
+ end
241
+
242
+ def proportion_standard_error(field,v=1)
243
+ prop=proportion(field,v)
244
+ sum=@ms.sum_field(field) {|s_name,vector|
245
+ nh=vector.size
246
+ s_size=@strata_sizes[s_name]
247
+ (s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
248
+ }
249
+ (1.quo(@population_size)) * Math::sqrt(sum)
250
+ end
251
+ # Cochran(1971), p. 150
252
+ def variance_pst(field,v=1)
253
+ sum=@ms.datasets.inject(0) {|a,da|
254
+ stratum_name=da[0]
255
+ ds=da[1]
256
+ nh=ds.cases.to_f
257
+ s_size=@strata_sizes[stratum_name]
258
+ prop=ds[field].proportion(v)
259
+ a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
260
+ }
261
+ (1/@population_size.to_f ** 2)*sum
262
+ end
263
+ end
281
264
  end