RubyGems - statsample - Versions diffs - 0.7.0 → 0.8.0 - Mend

statsample 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

data/History.txt +7 -0
data/Manifest.txt +15 -9
data/README.txt +6 -0
data/Rakefile +8 -0
data/{demo → examples}/correlation_matrix.rb +0 -0
data/{demo/dominanceanalysis.rb → examples/dominance_analysis.rb} +0 -0
data/{demo → examples}/dominance_analysis_bootstrap.rb +0 -0
data/{demo → examples}/levene.rb +0 -0
data/{demo → examples}/multiple_regression.rb +5 -3
data/{demo → examples}/multivariate_correlation.rb +0 -0
data/{demo → examples}/polychoric.rb +0 -0
data/{demo → examples}/principal_axis.rb +0 -0
data/examples/t_test.rb +11 -0
data/{demo → examples}/tetrachoric.rb +0 -0
data/lib/statistics2.rb +1 -1
data/lib/statsample.rb +57 -6
data/lib/statsample/bivariate/polychoric.rb +12 -25
data/lib/statsample/bivariate/tetrachoric.rb +1 -3
data/lib/statsample/converter/csv.rb +11 -12
data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -3
data/lib/statsample/factor/principalaxis.rb +0 -2
data/lib/statsample/factor/rotation.rb +6 -8
data/lib/statsample/graph.rb +8 -0
data/lib/statsample/graph/svggraph.rb +0 -4
data/lib/statsample/regression/multiple/baseengine.rb +25 -28
data/lib/statsample/regression/multiple/matrixengine.rb +30 -34
data/lib/statsample/test.rb +36 -1
data/lib/statsample/test/levene.rb +11 -7
data/lib/statsample/test/t.rb +189 -0
data/test/test_anova.rb +8 -10
data/test/test_bivariate.rb +40 -37
data/test/test_codification.rb +9 -13
data/test/test_combination.rb +37 -39
data/test/test_crosstab.rb +46 -48
data/test/test_csv.rb +40 -45
data/test/test_dataset.rb +150 -152
data/test/test_distribution.rb +24 -21
data/test/test_dominance_analysis.rb +10 -12
data/test/test_factor.rb +95 -91
data/test/test_ggobi.rb +30 -33
data/test/test_gsl.rb +4 -4
data/test/test_helpers.rb +26 -0
data/test/test_histogram.rb +5 -6
data/test/test_logit.rb +20 -21
data/test/test_matrix.rb +47 -48
data/test/test_mle.rb +130 -131
data/test/test_multiset.rb +95 -96
data/test/test_permutation.rb +35 -36
data/test/test_promise_after.rb +39 -0
data/test/test_regression.rb +49 -51
data/test/test_reliability.rb +29 -30
data/test/test_resample.rb +22 -23
data/test/test_srs.rb +8 -9
data/test/test_statistics.rb +12 -6
data/test/test_stest.rb +18 -10
data/test/test_stratified.rb +15 -16
data/test/test_svg_graph.rb +11 -22
data/test/test_test_t.rb +40 -0
data/test/test_umannwhitney.rb +14 -15
data/test/test_vector.rb +33 -37
data/test/test_xls.rb +34 -41
metadata +22 -11

data/test/test_mle.rb CHANGED Viewed

@@ -1,140 +1,139 @@
-$:.unshift(File.dirname(__FILE__)+'/../lib/')
-require 'statsample'
-require 'test/unit'
-class StatsampleMLETestCase < Test::Unit::TestCase
-    def setup
-        @file_binomial=File.dirname(__FILE__)+'/../data/test_binomial.csv'
-        @crime=File.dirname(__FILE__)+'/../data/crime.txt'
-        @cases=100
-        a=Array.new()
-        b=Array.new()
-        c=Array.new()
-        y=Array.new()
+require(File.dirname(__FILE__)+'/test_helpers.rb')
-        @cases.times{|i|
-            a.push(2*rand()-i)
-            b.push(2*rand()-5+i)
-            c.push(2*rand()+i)
-            y_val=i+(rand()*@cases.quo(2) - @cases.quo(4))
-            y.push(y_val<(@cases/2.0) ? 0.0 : 1.0)
-        }
-        a=a.to_vector(:scale)
-        b=b.to_vector(:scale)
-        c=c.to_vector(:scale)
-        y=y.to_vector(:scale)
-        @ds_indep={'a'=>a,'b'=>b,'c'=>c}.to_dataset
-        constant=([1.0]*@cases).to_vector(:scale)
-        @ds_indep_2={'constant'=>constant,'a'=>a,'b'=>b,'c'=>c}.to_dataset
-        @ds_indep_2.fields=%w{constant a b c}
-        @mat_x=@ds_indep_2.to_matrix
-        @mat_y=y.to_matrix(:vertical)
-        @ds=@ds_indep.dup
-        @ds.add_vector('y',y)
-    end
-    def test_normal
-        y=Array.new()
-        y=@ds_indep.collect{|row|
-            row['a']*5+row['b']+row['c']+rand()*3
-        }
-        constant=([1]*@cases).to_vector(:scale)
-        ds_indep_2=@ds_indep.dup
-        ds_indep_2['constant']=constant
-        ds_indep_2.fields=%w{constant a b c}
-        mat_x=ds_indep_2.to_matrix
-        mat_y=y.to_matrix(:vertical)
-        mle=Statsample::MLE::Normal.new()
-        mle.verbose=false
-        coeffs_nr=mle.newton_raphson(mat_x,mat_y)
-        #p coeffs_nr
-        ds=@ds_indep.dup
-        ds.add_vector('y',y)
-        lr=Statsample::Regression.multiple(ds,'y')
-        lr_constant = lr.constant
-        lr_coeffs   = lr.coeffs
-        assert_in_delta(coeffs_nr[0,0], lr_constant,0.0000001)
-        assert_in_delta(coeffs_nr[1,0], lr_coeffs["a"],0.0000001)
-        assert_in_delta(coeffs_nr[2,0], lr_coeffs["b"],0.0000001)
-        assert_in_delta(coeffs_nr[3,0], lr_coeffs["c"],0.0000001)
-    end
-    def test_probit
-        ds=Statsample::CSV.read(@file_binomial)
-        constant=([1.0]*ds.cases).to_vector(:scale)
-        ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c})
-        mat_x=ds_indep.to_matrix
-        mat_y=ds['y'].to_matrix(:vertical)
-        mle=Statsample::MLE::Probit.new
-        b_probit=mle.newton_raphson(mat_x,mat_y)
-        ll=mle.log_likehood(mat_x,mat_y,b_probit)
-        b_exp=[-3.0670,0.1763,0.4483,-0.2240]
-        b_exp.each_index{|i|
-            assert_in_delta(b_exp[i], b_probit[i,0], 0.001)
-        }
-        assert_in_delta(-38.31559,ll,0.0001)
-    end
-    def test_logit_crime
-        ds=Statsample::PlainText.read(@crime, %w{crimerat maleteen south educ police60 police59 labor  males pop nonwhite unemp1  unemp2 median belowmed})
-        constant=([1.0]*ds.cases).to_vector(:scale)
-        ds2=ds.dup(%w{maleteen south educ police59})
-        ds2['constant']=constant
-        ds2.fields=%w{constant maleteen south educ police59}
-        mat_x=ds2.to_matrix
-        mat_y=(ds.compute "(crimerat>=110) ? 1:0").to_matrix(:vertical)
-        mle=Statsample::MLE::Logit.new
-        b=mle.newton_raphson(mat_x,mat_y)
-        ll=mle.log_likehood(mat_x,mat_y,b)
-        assert_in_delta(-18.606959,ll,0.001)
-        exp=[-17.701, 0.0833,-1.117, 0.0229, 0.0581]
-        exp.each_index{|i|
-            assert_in_delta(exp[i],b[i,0],0.001)
-        }
-        assert_equal(5,mle.iterations)
-    end
-    def atest_logit_alglib
-        if(HAS_ALGIB)
-            ds=Statsample::CSV.read(@file_binomial)
-            constant=([1.0]*ds.cases).to_vector(:scale)
-            ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c} )
-            mat_x=ds_indep.to_matrix
-            mat_y=ds['y'].to_matrix(:vertical)
-            log=Alglib::Logit.build_from_matrix(ds.to_matrix)
-            coeffs=log.unpack[0]
-            b_alglib=Matrix.columns([[-coeffs[3], -coeffs[0], -coeffs[1], -coeffs[2]]])
-            mle=Statsample::MLE::Logit.new
-            ll_alglib=mle.log_likehood(mat_x,mat_y,b_alglib)
-            b_newton=mle.newton_raphson(mat_x,mat_y)
-            ll_pure_ruby=mle.log_likehood(mat_x,mat_y,b_newton)
-            #p b_alglib
-            #p b_newton
-            assert_in_delta(ll_alglib,ll_pure_ruby,1)
-        end
+class StatsampleMLETestCase < MiniTest::Unit::TestCase
+  def setup
+    @file_binomial=File.dirname(__FILE__)+'/../data/test_binomial.csv'
+    @crime=File.dirname(__FILE__)+'/../data/crime.txt'
+    @cases=100
+    a=Array.new()
+    b=Array.new()
+    c=Array.new()
+    y=Array.new()
-    end
-    def atest_logit1
-        log=Alglib::Logit.build_from_matrix(@ds.to_matrix)
-        coeffs=log.unpack[0]
-        b=Matrix.columns([[-coeffs[3],-coeffs[0],-coeffs[1],-coeffs[2]]])
-#        puts "Coeficientes beta alglib:"
-        #p b
-        mle_alglib=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,b)
-#       puts "MLE Alglib:"
-        #p mle_alglib
-#        Statsample::CSV.write(ds,"test_binomial.csv")
+    @cases.times{|i|
+      a.push(2*rand()-i)
+      b.push(2*rand()-5+i)
+      c.push(2*rand()+i)
+      y_val=i+(rand()*@cases.quo(2) - @cases.quo(4))
+      y.push(y_val<(@cases/2.0) ? 0.0 : 1.0)
+    }
+    a=a.to_vector(:scale)
+    b=b.to_vector(:scale)
+    c=c.to_vector(:scale)
+    y=y.to_vector(:scale)
+    @ds_indep={'a'=>a,'b'=>b,'c'=>c}.to_dataset
+    constant=([1.0]*@cases).to_vector(:scale)
+    @ds_indep_2={'constant'=>constant,'a'=>a,'b'=>b,'c'=>c}.to_dataset
+    @ds_indep_2.fields=%w{constant a b c}
+    @mat_x=@ds_indep_2.to_matrix
+    @mat_y=y.to_matrix(:vertical)
+    @ds=@ds_indep.dup
+    @ds.add_vector('y',y)
+  end
+  def test_normal
+    y=Array.new()
+    y=@ds_indep.collect{|row|
+      row['a']*5+row['b']+row['c']+rand()*3
+    }
+    constant=([1]*@cases).to_vector(:scale)
+    ds_indep_2=@ds_indep.dup
+    ds_indep_2['constant']=constant
+    ds_indep_2.fields=%w{constant a b c}
+    mat_x=ds_indep_2.to_matrix
+    mat_y=y.to_matrix(:vertical)
+    mle=Statsample::MLE::Normal.new()
+    mle.verbose=false
+    coeffs_nr=mle.newton_raphson(mat_x,mat_y)
+    #p coeffs_nr
+    ds=@ds_indep.dup
+    ds.add_vector('y',y)
+    lr=Statsample::Regression.multiple(ds,'y')
+    lr_constant = lr.constant
+    lr_coeffs   = lr.coeffs
+    assert_in_delta(coeffs_nr[0,0], lr_constant,0.0000001)
+    assert_in_delta(coeffs_nr[1,0], lr_coeffs["a"],0.0000001)
+    assert_in_delta(coeffs_nr[2,0], lr_coeffs["b"],0.0000001)
+    assert_in_delta(coeffs_nr[3,0], lr_coeffs["c"],0.0000001)
+  end
+  def test_probit
+    ds=Statsample::CSV.read(@file_binomial)
+    constant=([1.0]*ds.cases).to_vector(:scale)
+    ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c})
+    mat_x=ds_indep.to_matrix
+    mat_y=ds['y'].to_matrix(:vertical)
+    mle=Statsample::MLE::Probit.new
+    b_probit=mle.newton_raphson(mat_x,mat_y)
+    ll=mle.log_likehood(mat_x,mat_y,b_probit)
+    b_exp=[-3.0670,0.1763,0.4483,-0.2240]
+    b_exp.each_index{|i|
+      assert_in_delta(b_exp[i], b_probit[i,0], 0.001)
+    }
+    assert_in_delta(-38.31559,ll,0.0001)
+  end
+  def test_logit_crime
+    ds=Statsample::PlainText.read(@crime, %w{crimerat maleteen south educ police60 police59 labor  males pop nonwhite unemp1  unemp2 median belowmed})
+    constant=([1.0]*ds.cases).to_vector(:scale)
+    ds2=ds.dup(%w{maleteen south educ police59})
+    ds2['constant']=constant
+    ds2.fields=%w{constant maleteen south educ police59}
+    mat_x=ds2.to_matrix
+    mat_y=(ds.compute "(crimerat>=110) ? 1:0").to_matrix(:vertical)
+    mle=Statsample::MLE::Logit.new
+    b=mle.newton_raphson(mat_x,mat_y)
+    ll=mle.log_likehood(mat_x,mat_y,b)
+    assert_in_delta(-18.606959,ll,0.001)
+    exp=[-17.701, 0.0833,-1.117, 0.0229, 0.0581]
+    exp.each_index{|i|
+      assert_in_delta(exp[i],b[i,0],0.001)
+    }
+    assert_equal(5,mle.iterations)
+  end
+  def atest_logit_alglib
+    if(HAS_ALGIB)
+      ds=Statsample::CSV.read(@file_binomial)
+      constant=([1.0]*ds.cases).to_vector(:scale)
-#        puts "iniciando newton"
-        coeffs_nr=Statsample::MLE.newton_raphson(@mat_x,@mat_y, Statsample::MLE::Logit)
-        #p coeffs_nr
-        mle_pure_ruby=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,coeffs_nr)
-        #p mle_pure_ruby
+      ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c} )
-        #puts "Malo: #{mle_malo} Bueno: #{mle_bueno} : #{mle_malo-mle_bueno}"
+      mat_x=ds_indep.to_matrix
+      mat_y=ds['y'].to_matrix(:vertical)
+      log=Alglib::Logit.build_from_matrix(ds.to_matrix)
+      coeffs=log.unpack[0]
+      b_alglib=Matrix.columns([[-coeffs[3], -coeffs[0], -coeffs[1], -coeffs[2]]])
+      mle=Statsample::MLE::Logit.new
+      ll_alglib=mle.log_likehood(mat_x,mat_y,b_alglib)
+      b_newton=mle.newton_raphson(mat_x,mat_y)
+      ll_pure_ruby=mle.log_likehood(mat_x,mat_y,b_newton)
+      #p b_alglib
+      #p b_newton
+      assert_in_delta(ll_alglib,ll_pure_ruby,1)
     end
+  end
+  def atest_logit1
+    log=Alglib::Logit.build_from_matrix(@ds.to_matrix)
+    coeffs=log.unpack[0]
+    b=Matrix.columns([[-coeffs[3],-coeffs[0],-coeffs[1],-coeffs[2]]])
+    #        puts "Coeficientes beta alglib:"
+    #p b
+    mle_alglib=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,b)
+    #       puts "MLE Alglib:"
+    #p mle_alglib
+    #        Statsample::CSV.write(ds,"test_binomial.csv")
+    #        puts "iniciando newton"
+    coeffs_nr=Statsample::MLE.newton_raphson(@mat_x,@mat_y, Statsample::MLE::Logit)
+    #p coeffs_nr
+    mle_pure_ruby=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,coeffs_nr)
+    #p mle_pure_ruby
+    #puts "Malo: #{mle_malo} Bueno: #{mle_bueno} : #{mle_malo-mle_bueno}"
+  end
 end

data/test/test_multiset.rb CHANGED Viewed

@@ -1,98 +1,97 @@
-$:.unshift(File.dirname(__FILE__)+'/../lib/')
-require 'statsample'
-require 'test/unit'
+require(File.dirname(__FILE__)+'/test_helpers.rb')
-class StatsampleMultisetTestCase < Test::Unit::TestCase
-	def initialize(*args)
-		super
-	end
-    def test_creation
-        v1a=[1,2,3,4,5].to_vector
-        v2b=[11,21,31,41,51].to_vector
-        v3c=[21,23,34,45,56].to_vector
-        ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
-        v1b=[15,25,35,45,55].to_vector
-        v2b=[11,21,31,41,51].to_vector
-        v3b=[21,23,34,45,56].to_vector
-        ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
-        ms=Statsample::Multiset.new(['v1','v2','v3'])
-        ms.add_dataset('ds1',ds1)
-        ms.add_dataset('ds2',ds2)
-        assert_equal(ds1,ms['ds1'])
-        assert_equal(ds2,ms['ds2'])
-        assert_equal(v1a,ms['ds1']['v1'])
-        assert_not_equal(v1b,ms['ds1']['v1'])
-        ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
-        assert_raise ArgumentError do
-			ms.add_dataset(ds3)
-		end
-    end
-    def test_creation_empty
-        ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
-        ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
-        ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
-        ms2=Statsample::Multiset.new(%w{id age name})
-        ms2.add_dataset('male',ds_male)
-        ms2.add_dataset('female',ds_female)
-        assert_equal(ms2.fields,ms.fields)
-        assert_equal(ms2['male'],ms['male'])
-        assert_equal(ms2['female'],ms['female'])
-    end
-    def test_to_multiset_by_split_one
-        sex=%w{m m m m m f f f f m}.to_vector(:nominal)
-        city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
-        age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
-        ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
-        ms=ds.to_multiset_by_split('sex')
-        assert_equal(2,ms.n_datasets)
-        assert_equal(%w{f m},ms.datasets.keys.sort)
-        assert_equal(6,ms['m'].cases)
-        assert_equal(4,ms['f'].cases)
-        assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
-        assert_equal([34,33,35,36],ms['f']['age'].to_a)
-    end
-    def test_to_multiset_by_split_multiple
-        sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
-        city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
-		hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
-        age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
-        ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
-        ms=ds.to_multiset_by_split('sex','city','hair')
-        assert_equal(8,ms.n_datasets)
-		assert_equal(3,ms[%w{m London blonde}].cases)
-		assert_equal(3,ms[%w{m London blonde}].cases)
-		assert_equal(1,ms[%w{m Paris black}].cases)
-    end
-    def test_stratum_proportion
-        ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
-        ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
-        assert_equal(5.0/12, ds1['q1'].proportion )
-        assert_equal(7.0/9, ds2['q1'].proportion )
-        ms=Statsample::Multiset.new(['q1'])
-        ms.add_dataset('d1',ds1)
-        ms.add_dataset('d2',ds2)
-        ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
-        assert_in_delta(0.655, ss.proportion('q1'),0.01)
-        assert_in_delta(0.345, ss.proportion('q1',0),0.01)
-    end
-    def test_stratum_scale
-        boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
-        girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
-        ms=Statsample::Multiset.new(['test'])
-        ms.add_dataset('boys',boys)
-        ms.add_dataset('girls',girls)
-        ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
-        assert_equal(2,ss.strata_number)
-        assert_equal(20000,ss.population_size)
-        assert_equal(10000,ss.stratum_size('boys'))
-        assert_equal(10000,ss.stratum_size('girls'))
-        assert_equal(36,ss.sample_size)
-        assert_equal(75,ss.mean('test'))
-        assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
-        assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
+class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
+  def initialize(*args)
+    super
+  end
+  def test_creation
+    v1a=[1,2,3,4,5].to_vector
+    v2b=[11,21,31,41,51].to_vector
+    v3c=[21,23,34,45,56].to_vector
+    ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
+    v1b=[15,25,35,45,55].to_vector
+    v2b=[11,21,31,41,51].to_vector
+    v3b=[21,23,34,45,56].to_vector
+    ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
+    ms=Statsample::Multiset.new(['v1','v2','v3'])
+    ms.add_dataset('ds1',ds1)
+    ms.add_dataset('ds2',ds2)
+    assert_equal(ds1,ms['ds1'])
+    assert_equal(ds2,ms['ds2'])
+    assert_equal(v1a,ms['ds1']['v1'])
+    assert_not_equal(v1b,ms['ds1']['v1'])
+    ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
+    assert_raise ArgumentError do
+      ms.add_dataset(ds3)
     end
-end
+  end
+  def test_creation_empty
+    ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
+    ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
+    ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
+    ms2=Statsample::Multiset.new(%w{id age name})
+    ms2.add_dataset('male',ds_male)
+    ms2.add_dataset('female',ds_female)
+    assert_equal(ms2.fields,ms.fields)
+    assert_equal(ms2['male'],ms['male'])
+    assert_equal(ms2['female'],ms['female'])
+  end
+  def test_to_multiset_by_split_one
+    sex=%w{m m m m m f f f f m}.to_vector(:nominal)
+    city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
+    age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
+    ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
+    ms=ds.to_multiset_by_split('sex')
+    assert_equal(2,ms.n_datasets)
+    assert_equal(%w{f m},ms.datasets.keys.sort)
+    assert_equal(6,ms['m'].cases)
+    assert_equal(4,ms['f'].cases)
+    assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
+    assert_equal([34,33,35,36],ms['f']['age'].to_a)
+  end
+  def test_to_multiset_by_split_multiple
+    sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
+    city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
+    hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
+    age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
+    ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
+    ms=ds.to_multiset_by_split('sex','city','hair')
+    assert_equal(8,ms.n_datasets)
+    assert_equal(3,ms[%w{m London blonde}].cases)
+    assert_equal(3,ms[%w{m London blonde}].cases)
+    assert_equal(1,ms[%w{m Paris black}].cases)
+  end
+  def test_stratum_proportion
+    ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
+    ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
+    assert_equal(5.0/12, ds1['q1'].proportion )
+    assert_equal(7.0/9, ds2['q1'].proportion )
+    ms=Statsample::Multiset.new(['q1'])
+    ms.add_dataset('d1',ds1)
+    ms.add_dataset('d2',ds2)
+    ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
+    assert_in_delta(0.655, ss.proportion('q1'),0.01)
+    assert_in_delta(0.345, ss.proportion('q1',0),0.01)
+  end
+  def test_stratum_scale
+    boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
+    girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
+    ms=Statsample::Multiset.new(['test'])
+    ms.add_dataset('boys',boys)
+    ms.add_dataset('girls',girls)
+    ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
+    assert_equal(2,ss.strata_number)
+    assert_equal(20000,ss.population_size)
+    assert_equal(10000,ss.stratum_size('boys'))
+    assert_equal(10000,ss.stratum_size('girls'))
+    assert_equal(36,ss.sample_size)
+    assert_equal(75,ss.mean('test'))
+    assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
+    assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
+  end
+end