statsample 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +79 -0
- data/Manifest.txt +56 -0
- data/README.txt +77 -0
- data/Rakefile +22 -0
- data/bin/statsample +2 -0
- data/demo/benchmark.rb +52 -0
- data/demo/chi-square.rb +44 -0
- data/demo/dice.rb +13 -0
- data/demo/distribution_t.rb +95 -0
- data/demo/graph.rb +9 -0
- data/demo/item_analysis.rb +30 -0
- data/demo/mean.rb +81 -0
- data/demo/proportion.rb +57 -0
- data/demo/sample_test.csv +113 -0
- data/demo/strata_proportion.rb +152 -0
- data/demo/stratum.rb +141 -0
- data/lib/spss.rb +131 -0
- data/lib/statsample.rb +216 -0
- data/lib/statsample/anova.rb +74 -0
- data/lib/statsample/bivariate.rb +255 -0
- data/lib/statsample/chidistribution.rb +39 -0
- data/lib/statsample/codification.rb +120 -0
- data/lib/statsample/converters.rb +338 -0
- data/lib/statsample/crosstab.rb +122 -0
- data/lib/statsample/dataset.rb +526 -0
- data/lib/statsample/dominanceanalysis.rb +259 -0
- data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
- data/lib/statsample/graph/gdchart.rb +45 -0
- data/lib/statsample/graph/svgboxplot.rb +108 -0
- data/lib/statsample/graph/svggraph.rb +181 -0
- data/lib/statsample/graph/svghistogram.rb +208 -0
- data/lib/statsample/graph/svgscatterplot.rb +111 -0
- data/lib/statsample/htmlreport.rb +232 -0
- data/lib/statsample/multiset.rb +281 -0
- data/lib/statsample/regression.rb +522 -0
- data/lib/statsample/reliability.rb +235 -0
- data/lib/statsample/resample.rb +20 -0
- data/lib/statsample/srs.rb +159 -0
- data/lib/statsample/test.rb +25 -0
- data/lib/statsample/vector.rb +759 -0
- data/test/_test_chart.rb +58 -0
- data/test/test_anova.rb +31 -0
- data/test/test_codification.rb +59 -0
- data/test/test_crosstab.rb +55 -0
- data/test/test_csv.csv +7 -0
- data/test/test_csv.rb +27 -0
- data/test/test_dataset.rb +293 -0
- data/test/test_ggobi.rb +42 -0
- data/test/test_multiset.rb +98 -0
- data/test/test_regression.rb +108 -0
- data/test/test_reliability.rb +32 -0
- data/test/test_resample.rb +23 -0
- data/test/test_srs.rb +14 -0
- data/test/test_statistics.rb +152 -0
- data/test/test_stratified.rb +19 -0
- data/test/test_svg_graph.rb +63 -0
- data/test/test_vector.rb +265 -0
- data/test/test_xls.rb +32 -0
- metadata +158 -0
| @@ -0,0 +1,232 @@ | |
| 1 | 
            +
            require 'statsample/graph/svggraph'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Statsample
         | 
| 4 | 
            +
                class HtmlReport
         | 
| 5 | 
            +
                def initialize(name,dir=nil)
         | 
| 6 | 
            +
                    require 'fileutils'
         | 
| 7 | 
            +
                    @uniq=1
         | 
| 8 | 
            +
                    @uniq_file=0
         | 
| 9 | 
            +
                    @name=name
         | 
| 10 | 
            +
                    @partials=[]
         | 
| 11 | 
            +
                    @anchors=[]
         | 
| 12 | 
            +
                    dir||=@name+"/"
         | 
| 13 | 
            +
                    @dir=dir
         | 
| 14 | 
            +
                    @level=1
         | 
| 15 | 
            +
                    FileUtils.mkdir(@dir) if !File.exists? @dir
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
                def add_summary(name,summary)
         | 
| 18 | 
            +
                    add_anchor(name)
         | 
| 19 | 
            +
                    @partials.push(summary)
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
                def add_anchor(name)
         | 
| 22 | 
            +
                    @anchors.push([name,@level,@uniq])
         | 
| 23 | 
            +
                    @partials.push("<a name='#{@uniq}'> </a>")
         | 
| 24 | 
            +
                    @uniq+=1
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
                def uniq_file(prepend="file")
         | 
| 27 | 
            +
                    @uniq_file+=1
         | 
| 28 | 
            +
                    "#{prepend}_#{@uniq_file}_#{Time.now.to_i}"
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
                def add_correlation_matrix(ds)
         | 
| 31 | 
            +
                    add_anchor("Correlation Matrix")
         | 
| 32 | 
            +
                    html="<h2>Correlation Matrix</h2> <table><thead><th>-</th><th>"+ds.fields.join("</th><th>")+"</th> </thead> <tbody>"
         | 
| 33 | 
            +
                    matrix=Statsample::Bivariate.correlation_matrix(ds)
         | 
| 34 | 
            +
                    pmatrix=Statsample::Bivariate.correlation_probability_matrix(ds)
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  
         | 
| 37 | 
            +
                    (0...(matrix.row_size)).each {|row|
         | 
| 38 | 
            +
                        html+="<tr><td>"+ds.fields[row]+"</td>"
         | 
| 39 | 
            +
                        (0...(matrix.column_size)).each {|col|
         | 
| 40 | 
            +
                            if matrix[row,col].nil?
         | 
| 41 | 
            +
                                html+="<td>--</td>"
         | 
| 42 | 
            +
                            else
         | 
| 43 | 
            +
                                sig=""
         | 
| 44 | 
            +
                                prob_out=""
         | 
| 45 | 
            +
                                if !pmatrix[row,col].nil?
         | 
| 46 | 
            +
                                    prob=pmatrix[row,col]
         | 
| 47 | 
            +
                                    prob_out=sprintf("%0.3f",prob)
         | 
| 48 | 
            +
                                    if prob<0.01
         | 
| 49 | 
            +
                                        sig="**"
         | 
| 50 | 
            +
                                    elsif prob<0.05
         | 
| 51 | 
            +
                                        sig="*"
         | 
| 52 | 
            +
                                    else
         | 
| 53 | 
            +
                                        sig=""
         | 
| 54 | 
            +
                                    end
         | 
| 55 | 
            +
                                end
         | 
| 56 | 
            +
                                if sig==""
         | 
| 57 | 
            +
                                    html+="<td>#{sprintf("%0.3f",matrix[row,col])} #{sig}<br /> #{prob_out}</td>"
         | 
| 58 | 
            +
                                else
         | 
| 59 | 
            +
                                    html+="<td><strong>#{sprintf("%0.3f",matrix[row,col])} #{sig}<br /> #{prob_out}</strong></td>"
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                                end
         | 
| 62 | 
            +
                            end
         | 
| 63 | 
            +
                        }
         | 
| 64 | 
            +
                        html+="</tr>"
         | 
| 65 | 
            +
                    }
         | 
| 66 | 
            +
                    html+="</tbody></table>"
         | 
| 67 | 
            +
                    @partials.push(html)
         | 
| 68 | 
            +
                end
         | 
| 69 | 
            +
                # Add a scale
         | 
| 70 | 
            +
                # First arg is the name of the scale                          
         | 
| 71 | 
            +
                # Other are fields
         | 
| 72 | 
            +
                def add_scale(ds,name, fields,icc=false)
         | 
| 73 | 
            +
                    raise "Fields are empty" if fields.size==0
         | 
| 74 | 
            +
                    add_anchor("Scale:#{name}")
         | 
| 75 | 
            +
                    
         | 
| 76 | 
            +
                    ds_partial=ds.dup(fields)
         | 
| 77 | 
            +
                    ia=Statsample::Reliability::ItemAnalysis.new(ds_partial)
         | 
| 78 | 
            +
                    html="<h2>Scale: #{name}</h2>"
         | 
| 79 | 
            +
                    html << ia.html_summary
         | 
| 80 | 
            +
                    @partials.push(html)
         | 
| 81 | 
            +
                    @level+=1
         | 
| 82 | 
            +
                    v=ds_partial.vector_mean
         | 
| 83 | 
            +
                        add_histogram(name, v)        
         | 
| 84 | 
            +
                        add_runsequence_plot(name, v)        
         | 
| 85 | 
            +
                        add_normalprobability_plot(name,v)
         | 
| 86 | 
            +
                        add_icc(name,fields) if icc
         | 
| 87 | 
            +
                    @level-=1
         | 
| 88 | 
            +
                end
         | 
| 89 | 
            +
                
         | 
| 90 | 
            +
                def add_boxplot(name,vector,options={})
         | 
| 91 | 
            +
                    add_graph("Box Plot #{name}", name, vector.svggraph_boxplot(options))
         | 
| 92 | 
            +
                end    
         | 
| 93 | 
            +
                def add_graph(name,id,graph)
         | 
| 94 | 
            +
                    add_anchor(name)
         | 
| 95 | 
            +
                    rs_file=@dir+"/#{uniq_file()}.svg"
         | 
| 96 | 
            +
                    html = "<h3>#{name}</h3> <p><embed src='#{rs_file}'  width='#{graph.width}' height='#{graph.height}' type='image/svg+xml' /></p>\n"
         | 
| 97 | 
            +
                    File.open(rs_file, "w") {|f|
         | 
| 98 | 
            +
                        f.puts(graph.burn)
         | 
| 99 | 
            +
                    }
         | 
| 100 | 
            +
                    @partials.push(html)
         | 
| 101 | 
            +
                end
         | 
| 102 | 
            +
                def add_runsequence_plot(name, vector,options={})
         | 
| 103 | 
            +
                    add_graph("Run-Sequence Plot #{name}", name, vector.svggraph_runsequence_plot(options))
         | 
| 104 | 
            +
                end
         | 
| 105 | 
            +
                def add_lag_plot(name,vector, options={})
         | 
| 106 | 
            +
                    add_graph("Lag Plot #{name}", name,vector.svggraph_lag_plot(options))
         | 
| 107 | 
            +
                end
         | 
| 108 | 
            +
                    
         | 
| 109 | 
            +
                def add_normalprobability_plot(name,vector,options={})
         | 
| 110 | 
            +
                    add_graph("Normal Probability Plot #{name}", name, vector.svggraph_normalprobability_plot(options))
         | 
| 111 | 
            +
                end
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                def add_scatterplot(name, ds,x_field=nil, y_fields=nil,config={})
         | 
| 114 | 
            +
                    add_anchor("Scatterplot: #{name}")
         | 
| 115 | 
            +
                    x_field||=ds.fields[0]
         | 
| 116 | 
            +
                    y_fields||=ds.fields-[x_field]
         | 
| 117 | 
            +
                    ds_partial=ds.dup([x_field]+y_fields)
         | 
| 118 | 
            +
                    sc=Statsample::Graph::SvgScatterplot.new(ds_partial, config)
         | 
| 119 | 
            +
                    sc.parse
         | 
| 120 | 
            +
                    sc_file=@dir+"/#{uniq_file("sc")}.svg"
         | 
| 121 | 
            +
                    html = "<h3>Scatterplot #{name}</h3> <p><embed src='#{sc_file}'  width='#{sc.width}' height='#{sc.height}' type='image/svg+xml' /></p>\n"
         | 
| 122 | 
            +
                    File.open(sc_file, "w") {|f|
         | 
| 123 | 
            +
                          f.puts(sc.burn)
         | 
| 124 | 
            +
                    }
         | 
| 125 | 
            +
                    @partials.push(html)
         | 
| 126 | 
            +
                end
         | 
| 127 | 
            +
                
         | 
| 128 | 
            +
                
         | 
| 129 | 
            +
                def add_boxplots(name, ds,options={})
         | 
| 130 | 
            +
                    add_anchor("Boxplots: #{name}")
         | 
| 131 | 
            +
                    options={:graph_title=>"Boxplots:#{name}", :show_graph_title=>true, :height=>500}.merge! options
         | 
| 132 | 
            +
                    graph = Statsample::Graph::SvgBoxplot.new(options)
         | 
| 133 | 
            +
                    ds.fields.each{|f|
         | 
| 134 | 
            +
                        graph.add_data(:title=>f, 
         | 
| 135 | 
            +
                            :data=>ds[f].valid_data,
         | 
| 136 | 
            +
                            :vector=>ds[f]
         | 
| 137 | 
            +
                            )
         | 
| 138 | 
            +
                    }
         | 
| 139 | 
            +
                    add_graph(name,name,graph)
         | 
| 140 | 
            +
                    graph
         | 
| 141 | 
            +
                end
         | 
| 142 | 
            +
                def add_histogram(name,vector,bins=nil,options={})
         | 
| 143 | 
            +
                    bins||=vector.size / 15
         | 
| 144 | 
            +
                    bins=15 if bins>15 
         | 
| 145 | 
            +
                    graph=vector.svggraph_histogram(bins,options)
         | 
| 146 | 
            +
                    add_graph("Histogram:#{name}",name,graph)
         | 
| 147 | 
            +
                    html = "<ul><li>Skewness=#{sprintf("%0.3f",vector.skew)}</li>
         | 
| 148 | 
            +
                    <li>Kurtosis=#{sprintf("%0.3f",vector.kurtosis)}</li></ul>"
         | 
| 149 | 
            +
                    @partials.push(html)
         | 
| 150 | 
            +
                end
         | 
| 151 | 
            +
                def add_icc(name,ds, fields)
         | 
| 152 | 
            +
                    require 'statsample/graph/svggraph'
         | 
| 153 | 
            +
                    raise "Fields are empty" if fields.size==0
         | 
| 154 | 
            +
                    add_anchor("ICC:#{name}")        
         | 
| 155 | 
            +
                    ds_partial=ds.dup(fields)
         | 
| 156 | 
            +
                    ia=Statsample::Reliability::ItemAnalysis.new(ds_partial)
         | 
| 157 | 
            +
                    html="<h3>ICC for scale: #{name}</h3>"
         | 
| 158 | 
            +
                    ia.svggraph_item_characteristic_curve(@dir ,name, {:width=>400,:height=>300})
         | 
| 159 | 
            +
                    ds_partial.fields.sort.each{|f|
         | 
| 160 | 
            +
                        html << "<div><p><strong>#{f}</strong></p><embed src='#{@dir}/#{name}_#{f}.svg'  width='400' height='300' type='image/svg+xml' /></div>\n"
         | 
| 161 | 
            +
                    }
         | 
| 162 | 
            +
                    @partials.push(html)
         | 
| 163 | 
            +
                end
         | 
| 164 | 
            +
                def css
         | 
| 165 | 
            +
            <<HERE
         | 
| 166 | 
            +
            table {
         | 
| 167 | 
            +
              border-collapse: collapse;
         | 
| 168 | 
            +
            }
         | 
| 169 | 
            +
            th {
         | 
| 170 | 
            +
              text-align: left;
         | 
| 171 | 
            +
              padding-right: 1em;
         | 
| 172 | 
            +
              border-bottom: 3px solid #ccc;
         | 
| 173 | 
            +
            }
         | 
| 174 | 
            +
            th.active img {
         | 
| 175 | 
            +
              display: inline;
         | 
| 176 | 
            +
            }
         | 
| 177 | 
            +
            tr.even, tr.odd {
         | 
| 178 | 
            +
              background-color: #eee;
         | 
| 179 | 
            +
              border-bottom: 1px solid #ccc;
         | 
| 180 | 
            +
            }
         | 
| 181 | 
            +
            tr.even, tr.odd {
         | 
| 182 | 
            +
              padding: 0.1em 0.6em;
         | 
| 183 | 
            +
            }
         | 
| 184 | 
            +
            td.active {
         | 
| 185 | 
            +
              background-color: #ddd;
         | 
| 186 | 
            +
            }
         | 
| 187 | 
            +
            table td {
         | 
| 188 | 
            +
            border:1px solid #aaa;
         | 
| 189 | 
            +
            }
         | 
| 190 | 
            +
            table tr.line td{
         | 
| 191 | 
            +
            border-top: 2px solid black;
         | 
| 192 | 
            +
            }
         | 
| 193 | 
            +
             | 
| 194 | 
            +
            HERE
         | 
| 195 | 
            +
                end
         | 
| 196 | 
            +
                
         | 
| 197 | 
            +
                def create_uls(level)
         | 
| 198 | 
            +
                    if @c_level!=level
         | 
| 199 | 
            +
                        if level>@c_level
         | 
| 200 | 
            +
                            "<ul>\n" * (level-@c_level)
         | 
| 201 | 
            +
                        else
         | 
| 202 | 
            +
                            "</ul>\n" * (@c_level-level)
         | 
| 203 | 
            +
                        end
         | 
| 204 | 
            +
                    else
         | 
| 205 | 
            +
                        ""
         | 
| 206 | 
            +
                    end
         | 
| 207 | 
            +
                end
         | 
| 208 | 
            +
                
         | 
| 209 | 
            +
                def parse
         | 
| 210 | 
            +
                    html="<html><head><title>#{@name}</title><style>#{css()}</style></head><body><h1>Report: #{@name}</h1>"
         | 
| 211 | 
            +
                    if @anchors.size>0
         | 
| 212 | 
            +
                        html << "<div class='index'>Index</div><ul>"
         | 
| 213 | 
            +
                        @c_level=1
         | 
| 214 | 
            +
                        @anchors.each{|name,level,uniq|
         | 
| 215 | 
            +
                            html << create_uls(level)
         | 
| 216 | 
            +
                            @c_level=level
         | 
| 217 | 
            +
                            html << "<li><a href='#"+uniq.to_s+"'>#{name}</a></li>"
         | 
| 218 | 
            +
                        }
         | 
| 219 | 
            +
                        html << create_uls(1)
         | 
| 220 | 
            +
                        html << "</ul></div>"
         | 
| 221 | 
            +
                    end
         | 
| 222 | 
            +
                    html+="<div class='section'>"+@partials.join("</div><div class='section'>")+"</div>"
         | 
| 223 | 
            +
                    html+="</body></html>"
         | 
| 224 | 
            +
                    html
         | 
| 225 | 
            +
                end
         | 
| 226 | 
            +
                def save(filename)
         | 
| 227 | 
            +
                    File.open(filename,"w") {|fp|
         | 
| 228 | 
            +
                        fp.write(parse)
         | 
| 229 | 
            +
                    }
         | 
| 230 | 
            +
                end
         | 
| 231 | 
            +
            end
         | 
| 232 | 
            +
            end
         | 
| @@ -0,0 +1,281 @@ | |
| 1 | 
            +
            module Statsample
         | 
| 2 | 
            +
                # Multiset joins multiple dataset with the same fields and vectors
         | 
| 3 | 
            +
                # but with different number of cases. 
         | 
| 4 | 
            +
                # This is the base class for stratified and cluster sampling estimation
         | 
| 5 | 
            +
                class Multiset
         | 
| 6 | 
            +
                    attr_reader :fields, :datasets
         | 
| 7 | 
            +
                    # To create a multiset
         | 
| 8 | 
            +
                    # * Multiset.new(%w{f1 f2 f3}) # define only fields
         | 
| 9 | 
            +
                    def initialize(fields)
         | 
| 10 | 
            +
                        @fields=fields
         | 
| 11 | 
            +
                        @datasets={}
         | 
| 12 | 
            +
                    end
         | 
| 13 | 
            +
                    def self.new_empty_vectors(fields,ds_names) 
         | 
| 14 | 
            +
                        ms=Multiset.new(fields)
         | 
| 15 | 
            +
                        ds_names.each{|d|
         | 
| 16 | 
            +
                            ms.add_dataset(d,Dataset.new(fields))
         | 
| 17 | 
            +
                        }
         | 
| 18 | 
            +
                        ms
         | 
| 19 | 
            +
                    end
         | 
| 20 | 
            +
                    def datasets_names
         | 
| 21 | 
            +
                        @datasets.keys.sort
         | 
| 22 | 
            +
                    end
         | 
| 23 | 
            +
                    def n_datasets
         | 
| 24 | 
            +
                        @datasets.size
         | 
| 25 | 
            +
                    end
         | 
| 26 | 
            +
                    def add_dataset(key,ds)
         | 
| 27 | 
            +
                        if(ds.fields!=@fields)
         | 
| 28 | 
            +
                        raise ArgumentError, "Dataset(#{ds.fields.to_s})must have the same fields of the Multiset(#{@fields})"
         | 
| 29 | 
            +
                        else
         | 
| 30 | 
            +
                            @datasets[key]=ds
         | 
| 31 | 
            +
                        end
         | 
| 32 | 
            +
                    end
         | 
| 33 | 
            +
            		def sum_field(field)
         | 
| 34 | 
            +
            			@datasets.inject(0) {|a,da|
         | 
| 35 | 
            +
            				stratum_name=da[0]
         | 
| 36 | 
            +
                            vector=da[1][field]
         | 
| 37 | 
            +
            				val=yield stratum_name,vector
         | 
| 38 | 
            +
            				a+val
         | 
| 39 | 
            +
            			}
         | 
| 40 | 
            +
            		end
         | 
| 41 | 
            +
                    def collect_vector(field)
         | 
| 42 | 
            +
                        @datasets.collect {|k,v|
         | 
| 43 | 
            +
                            yield k, v[field]
         | 
| 44 | 
            +
                        }
         | 
| 45 | 
            +
                    end
         | 
| 46 | 
            +
                    def[](i)
         | 
| 47 | 
            +
                        @datasets[i]
         | 
| 48 | 
            +
                    end
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
                class StratifiedSample
         | 
| 51 | 
            +
            		class << self
         | 
| 52 | 
            +
            			# mean for an array of vectors
         | 
| 53 | 
            +
            			def mean(*v)
         | 
| 54 | 
            +
            				n_total=0
         | 
| 55 | 
            +
            				a=v.inject(0){|a,v|
         | 
| 56 | 
            +
            					n_total+=v.size
         | 
| 57 | 
            +
            					a+v.sum
         | 
| 58 | 
            +
            				}
         | 
| 59 | 
            +
            				a.to_f/n_total
         | 
| 60 | 
            +
            			end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                        def standard_error_ksd_wr(es)
         | 
| 63 | 
            +
                            n_total=0
         | 
| 64 | 
            +
                            sum=es.inject(0){|a,h|
         | 
| 65 | 
            +
                                n_total+=h['N']
         | 
| 66 | 
            +
                                a+((h['N']**2 * h['s']**2) / h['n'].to_f)
         | 
| 67 | 
            +
                            }
         | 
| 68 | 
            +
                            (1.to_f / n_total)*Math::sqrt(sum)
         | 
| 69 | 
            +
                        end
         | 
| 70 | 
            +
                        
         | 
| 71 | 
            +
                        
         | 
| 72 | 
            +
                        def variance_ksd_wr(es)
         | 
| 73 | 
            +
                            standard_error_ksd_wr(es)**2
         | 
| 74 | 
            +
                        end
         | 
| 75 | 
            +
                        
         | 
| 76 | 
            +
                        # Source : Cochran (1972)
         | 
| 77 | 
            +
                        
         | 
| 78 | 
            +
                        def variance_ksd_wor(es)
         | 
| 79 | 
            +
                            n_total=es.inject(0) {|a,h|
         | 
| 80 | 
            +
                                a+h['N']
         | 
| 81 | 
            +
                            }    
         | 
| 82 | 
            +
                            es.inject(0){|a,h|
         | 
| 83 | 
            +
                                val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
         | 
| 84 | 
            +
                                a+val
         | 
| 85 | 
            +
                            }
         | 
| 86 | 
            +
                        end
         | 
| 87 | 
            +
                        def standard_error_ksd_wor(es)
         | 
| 88 | 
            +
                            Math::sqrt(variance_ksd_wor(es))
         | 
| 89 | 
            +
                        end
         | 
| 90 | 
            +
                        
         | 
| 91 | 
            +
                        
         | 
| 92 | 
            +
                        
         | 
| 93 | 
            +
                        def variance_esd_wor(es)
         | 
| 94 | 
            +
                            n_total=es.inject(0) {|a,h|
         | 
| 95 | 
            +
                                a+h['N']
         | 
| 96 | 
            +
                            }
         | 
| 97 | 
            +
                            
         | 
| 98 | 
            +
                            sum=es.inject(0){|a,h|
         | 
| 99 | 
            +
                                val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
         | 
| 100 | 
            +
                                a+val
         | 
| 101 | 
            +
                            }
         | 
| 102 | 
            +
                            (1.0/(n_total**2))*sum
         | 
| 103 | 
            +
                        end
         | 
| 104 | 
            +
                        
         | 
| 105 | 
            +
                        
         | 
| 106 | 
            +
                        def standard_error_esd_wor(es)
         | 
| 107 | 
            +
                            Math::sqrt(variance_ksd_wor(es))
         | 
| 108 | 
            +
                        end
         | 
| 109 | 
            +
                        # Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
         | 
| 110 | 
            +
                        def variance_esd_wr(es)
         | 
| 111 | 
            +
                            n_total=es.inject(0) {|a,h|
         | 
| 112 | 
            +
                                a+h['N']
         | 
| 113 | 
            +
                            }
         | 
| 114 | 
            +
                            
         | 
| 115 | 
            +
                            sum=es.inject(0){|a,h|
         | 
| 116 | 
            +
                                val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
         | 
| 117 | 
            +
                                a+val
         | 
| 118 | 
            +
                            }
         | 
| 119 | 
            +
                            (1.0/(n_total**2))*sum
         | 
| 120 | 
            +
                        end
         | 
| 121 | 
            +
                        def standard_error_esd_wr(es)
         | 
| 122 | 
            +
                            Math::sqrt(variance_esd_wr(es))
         | 
| 123 | 
            +
                        end
         | 
| 124 | 
            +
                        
         | 
| 125 | 
            +
                        def proportion_variance_ksd_wor(es)
         | 
| 126 | 
            +
                            n_total=es.inject(0) {|a,h|
         | 
| 127 | 
            +
                                a+h['N']
         | 
| 128 | 
            +
                            }
         | 
| 129 | 
            +
                            
         | 
| 130 | 
            +
                            es.inject(0){|a,h|
         | 
| 131 | 
            +
                                val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
         | 
| 132 | 
            +
                                a+val
         | 
| 133 | 
            +
                            }
         | 
| 134 | 
            +
                        end
         | 
| 135 | 
            +
                        def proportion_sd_ksd_wor(es)
         | 
| 136 | 
            +
                            Math::sqrt(proportion_variance_ksd_wor(es))
         | 
| 137 | 
            +
                        end
         | 
| 138 | 
            +
                        
         | 
| 139 | 
            +
                        
         | 
| 140 | 
            +
                        def proportion_sd_ksd_wr(es)
         | 
| 141 | 
            +
                            n_total=es.inject(0) {|a,h|
         | 
| 142 | 
            +
                                a+h['N']
         | 
| 143 | 
            +
                            }
         | 
| 144 | 
            +
                            
         | 
| 145 | 
            +
                            sum=es.inject(0){|a,h|
         | 
| 146 | 
            +
                                val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
         | 
| 147 | 
            +
                                a+val
         | 
| 148 | 
            +
                            }
         | 
| 149 | 
            +
                            Math::sqrt(sum) * (1.0/n_total)
         | 
| 150 | 
            +
                        end
         | 
| 151 | 
            +
                        def proportion_variance_ksd_wr(es)
         | 
| 152 | 
            +
                            proportion_variance_ksd_wor(es)**2
         | 
| 153 | 
            +
                        end
         | 
| 154 | 
            +
                        
         | 
| 155 | 
            +
                        def proportion_variance_esd_wor(es)
         | 
| 156 | 
            +
                            n_total=es.inject(0) {|a,h|
         | 
| 157 | 
            +
                                a+h['N']
         | 
| 158 | 
            +
                            }
         | 
| 159 | 
            +
                            
         | 
| 160 | 
            +
                            sum=es.inject(0){|a,h|
         | 
| 161 | 
            +
                                a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
         | 
| 162 | 
            +
                                a+val
         | 
| 163 | 
            +
                            }
         | 
| 164 | 
            +
                            Math::sqrt(sum) * (1.0/n_total**2)
         | 
| 165 | 
            +
                        end
         | 
| 166 | 
            +
                        def proportion_sd_esd_wor(es)
         | 
| 167 | 
            +
                            Math::sqrt(proportion_variance_ksd_wor(es))
         | 
| 168 | 
            +
                        end
         | 
| 169 | 
            +
                        
         | 
| 170 | 
            +
                        
         | 
| 171 | 
            +
                        
         | 
| 172 | 
            +
            		end
         | 
| 173 | 
            +
                    def initialize(ms,strata_sizes)
         | 
| 174 | 
            +
                        raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
         | 
| 175 | 
            +
                        @ms=ms
         | 
| 176 | 
            +
                        raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
         | 
| 177 | 
            +
                        @strata_sizes=strata_sizes
         | 
| 178 | 
            +
                        @population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
         | 
| 179 | 
            +
                        @strata_number=@ms.n_datasets
         | 
| 180 | 
            +
                        @sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].cases}
         | 
| 181 | 
            +
                    end
         | 
| 182 | 
            +
                    # Number of strata
         | 
| 183 | 
            +
                    def strata_number
         | 
| 184 | 
            +
                        @strata_number
         | 
| 185 | 
            +
                    end
         | 
| 186 | 
            +
                    # Population size. Equal to sum of strata sizes
         | 
| 187 | 
            +
                    # Symbol: N<sub>h</sub>
         | 
| 188 | 
            +
                    def population_size
         | 
| 189 | 
            +
                        @population_size
         | 
| 190 | 
            +
                    end
         | 
| 191 | 
            +
                    # Sample size. Equal to sum of sample of each stratum
         | 
| 192 | 
            +
                    def sample_size
         | 
| 193 | 
            +
                        @sample_size
         | 
| 194 | 
            +
                    end
         | 
| 195 | 
            +
                    # Size of stratum x
         | 
| 196 | 
            +
                    def stratum_size(h)
         | 
| 197 | 
            +
                        @strata_sizes[h]
         | 
| 198 | 
            +
                    end
         | 
| 199 | 
            +
                    def vectors_by_field(field)
         | 
| 200 | 
            +
                        @ms.datasets.collect{|k,ds|
         | 
| 201 | 
            +
                            ds[field]
         | 
| 202 | 
            +
                        }
         | 
| 203 | 
            +
                    end
         | 
| 204 | 
            +
                    # Population proportion based on strata
         | 
| 205 | 
            +
                    def proportion(field, v=1)
         | 
| 206 | 
            +
            			@ms.sum_field(field) {|s_name,vector|
         | 
| 207 | 
            +
            				stratum_ponderation(s_name)*vector.proportion(v)
         | 
| 208 | 
            +
            			}
         | 
| 209 | 
            +
                    end
         | 
| 210 | 
            +
                    # Stratum ponderation.
         | 
| 211 | 
            +
                    # Symbol: W\<sub>h\</sub>
         | 
| 212 | 
            +
                    def stratum_ponderation(h)
         | 
| 213 | 
            +
                        @strata_sizes[h].to_f / @population_size
         | 
| 214 | 
            +
                    end
         | 
| 215 | 
            +
                    alias_method :wh, :stratum_ponderation
         | 
| 216 | 
            +
                    
         | 
| 217 | 
            +
                    # Population mean based on strata
         | 
| 218 | 
            +
                    def mean(field)
         | 
| 219 | 
            +
            			@ms.sum_field(field) {|s_name,vector|
         | 
| 220 | 
            +
            				stratum_ponderation(s_name)*vector.mean
         | 
| 221 | 
            +
            			}
         | 
| 222 | 
            +
                    end
         | 
| 223 | 
            +
                    # Standard error with estimated population variance and without replacement.
         | 
| 224 | 
            +
                    # Source: Cochran (1972)
         | 
| 225 | 
            +
                    def standard_error_wor(field)
         | 
| 226 | 
            +
                        es=@ms.collect_vector(field) {|s_n, vector|
         | 
| 227 | 
            +
                            {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
         | 
| 228 | 
            +
                        }
         | 
| 229 | 
            +
                        
         | 
| 230 | 
            +
                        StratifiedSample.standard_error_esd_wor(es)
         | 
| 231 | 
            +
                    end
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                    # Standard error with estimated population variance and without replacement.
         | 
| 234 | 
            +
                    # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
         | 
| 235 | 
            +
             | 
| 236 | 
            +
                    def standard_error_wor_2(field)
         | 
| 237 | 
            +
            			sum=@ms.sum_field(field) {|s_name,vector|
         | 
| 238 | 
            +
                            s_size=@strata_sizes[s_name]
         | 
| 239 | 
            +
            				(s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
         | 
| 240 | 
            +
            			}
         | 
| 241 | 
            +
                        (1/@population_size.to_f)*Math::sqrt(sum)
         | 
| 242 | 
            +
                    end
         | 
| 243 | 
            +
                    
         | 
| 244 | 
            +
                    def standard_error_wr(field)
         | 
| 245 | 
            +
                        es=@ms.collect_vector(field) {|s_n, vector|
         | 
| 246 | 
            +
                            {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
         | 
| 247 | 
            +
                        }
         | 
| 248 | 
            +
                        
         | 
| 249 | 
            +
                        StratifiedSample.standard_error_esd_wr(es)
         | 
| 250 | 
            +
                    end
         | 
| 251 | 
            +
                    def proportion_sd_esd_wor(field,v=1)
         | 
| 252 | 
            +
                        es=@ms.collect_vector(field) {|s_n, vector|
         | 
| 253 | 
            +
                            {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
         | 
| 254 | 
            +
                        }
         | 
| 255 | 
            +
                        
         | 
| 256 | 
            +
                        StratifiedSample.proportion_sd_esd_wor(es)
         | 
| 257 | 
            +
                    end
         | 
| 258 | 
            +
                    
         | 
| 259 | 
            +
                    def proportion_standard_error(field,v=1)
         | 
| 260 | 
            +
                        prop=proportion(field,v)
         | 
| 261 | 
            +
                        sum=@ms.sum_field(field) {|s_name,vector|
         | 
| 262 | 
            +
                            nh=vector.size
         | 
| 263 | 
            +
                            s_size=@strata_sizes[s_name]
         | 
| 264 | 
            +
                            (s_size**2 * (1-(nh/s_size)) * prop * (1-prop) / (nh -1 ))
         | 
| 265 | 
            +
                        }
         | 
| 266 | 
            +
                        (1/@population_size.to_f) * Math::sqrt(sum)
         | 
| 267 | 
            +
                    end
         | 
| 268 | 
            +
                    # Cochran(1971), p. 150 
         | 
| 269 | 
            +
                    def variance_pst(field,v=1)
         | 
| 270 | 
            +
                        sum=@ms.datasets.inject(0) {|a,da|
         | 
| 271 | 
            +
                            stratum_name=da[0]
         | 
| 272 | 
            +
                            ds=da[1]
         | 
| 273 | 
            +
                            nh=ds.cases.to_f
         | 
| 274 | 
            +
                            s_size=@strata_sizes[stratum_name]
         | 
| 275 | 
            +
                            prop=ds[field].proportion(v)
         | 
| 276 | 
            +
                            a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
         | 
| 277 | 
            +
                        }
         | 
| 278 | 
            +
                        (1/@population_size.to_f ** 2)*sum
         | 
| 279 | 
            +
                    end
         | 
| 280 | 
            +
                end
         | 
| 281 | 
            +
            end
         |