statsample 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +79 -0
- data/Manifest.txt +56 -0
- data/README.txt +77 -0
- data/Rakefile +22 -0
- data/bin/statsample +2 -0
- data/demo/benchmark.rb +52 -0
- data/demo/chi-square.rb +44 -0
- data/demo/dice.rb +13 -0
- data/demo/distribution_t.rb +95 -0
- data/demo/graph.rb +9 -0
- data/demo/item_analysis.rb +30 -0
- data/demo/mean.rb +81 -0
- data/demo/proportion.rb +57 -0
- data/demo/sample_test.csv +113 -0
- data/demo/strata_proportion.rb +152 -0
- data/demo/stratum.rb +141 -0
- data/lib/spss.rb +131 -0
- data/lib/statsample.rb +216 -0
- data/lib/statsample/anova.rb +74 -0
- data/lib/statsample/bivariate.rb +255 -0
- data/lib/statsample/chidistribution.rb +39 -0
- data/lib/statsample/codification.rb +120 -0
- data/lib/statsample/converters.rb +338 -0
- data/lib/statsample/crosstab.rb +122 -0
- data/lib/statsample/dataset.rb +526 -0
- data/lib/statsample/dominanceanalysis.rb +259 -0
- data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
- data/lib/statsample/graph/gdchart.rb +45 -0
- data/lib/statsample/graph/svgboxplot.rb +108 -0
- data/lib/statsample/graph/svggraph.rb +181 -0
- data/lib/statsample/graph/svghistogram.rb +208 -0
- data/lib/statsample/graph/svgscatterplot.rb +111 -0
- data/lib/statsample/htmlreport.rb +232 -0
- data/lib/statsample/multiset.rb +281 -0
- data/lib/statsample/regression.rb +522 -0
- data/lib/statsample/reliability.rb +235 -0
- data/lib/statsample/resample.rb +20 -0
- data/lib/statsample/srs.rb +159 -0
- data/lib/statsample/test.rb +25 -0
- data/lib/statsample/vector.rb +759 -0
- data/test/_test_chart.rb +58 -0
- data/test/test_anova.rb +31 -0
- data/test/test_codification.rb +59 -0
- data/test/test_crosstab.rb +55 -0
- data/test/test_csv.csv +7 -0
- data/test/test_csv.rb +27 -0
- data/test/test_dataset.rb +293 -0
- data/test/test_ggobi.rb +42 -0
- data/test/test_multiset.rb +98 -0
- data/test/test_regression.rb +108 -0
- data/test/test_reliability.rb +32 -0
- data/test/test_resample.rb +23 -0
- data/test/test_srs.rb +14 -0
- data/test/test_statistics.rb +152 -0
- data/test/test_stratified.rb +19 -0
- data/test/test_svg_graph.rb +63 -0
- data/test/test_vector.rb +265 -0
- data/test/test_xls.rb +32 -0
- metadata +158 -0
    
        data/test/test_ggobi.rb
    ADDED
    
    | @@ -0,0 +1,42 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__)+'/../lib/statsample'
         | 
| 2 | 
            +
            require 'statsample/multiset'
         | 
| 3 | 
            +
            require 'test/unit'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            class StatsampleGGobiTestCase < Test::Unit::TestCase
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            	def initialize(*args)
         | 
| 8 | 
            +
            		super
         | 
| 9 | 
            +
            		v1=([10.2,20.3,10,20,30,40,30,20,30,40]*10).to_vector(:scale)
         | 
| 10 | 
            +
            		@v2=(%w{a b c a a a b b c d}*10).to_vector(:nominal)
         | 
| 11 | 
            +
            		@v2.labels={"a"=>"letter a","d"=>"letter d"}
         | 
| 12 | 
            +
            		v3=([1,2,3,4,5,4,3,2,1,2]*10).to_vector(:ordinal)
         | 
| 13 | 
            +
            		@ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
         | 
| 14 | 
            +
            	end
         | 
| 15 | 
            +
            	def test_values_definition
         | 
| 16 | 
            +
            		a=[1.0,2,"a"]
         | 
| 17 | 
            +
            		assert_equal("<real>1.0</real> <int>2</int> <string>a</string>",Statsample::GGobi.values_definition(a))
         | 
| 18 | 
            +
            	end
         | 
| 19 | 
            +
            	def test_variable_definition
         | 
| 20 | 
            +
            		carrier=OpenStruct.new
         | 
| 21 | 
            +
            		carrier.categorials=[]
         | 
| 22 | 
            +
            		carrier.conversions={}
         | 
| 23 | 
            +
            		real_var_definition=Statsample::GGobi.variable_definition(carrier,@v2,'variable 2',"v2")
         | 
| 24 | 
            +
            		expected=<<EOS
         | 
| 25 | 
            +
            <categoricalvariable name="variable 2" nickname="v2">
         | 
| 26 | 
            +
            <levels count="4">
         | 
| 27 | 
            +
            <level value="1">letter a</level>
         | 
| 28 | 
            +
            <level value="2">b</level>
         | 
| 29 | 
            +
            <level value="3">c</level>
         | 
| 30 | 
            +
            <level value="4">letter d</level></levels>
         | 
| 31 | 
            +
            </categoricalvariable>
         | 
| 32 | 
            +
            EOS
         | 
| 33 | 
            +
            assert_equal(expected.gsub(/\s/," "),real_var_definition.gsub(/\s/," "))
         | 
| 34 | 
            +
            assert_equal({'variable 2'=>{'a'=>1,'b'=>2,'c'=>3,'d'=>4}},carrier.conversions)
         | 
| 35 | 
            +
            	assert_equal(['variable 2'],carrier.categorials)
         | 
| 36 | 
            +
            	end
         | 
| 37 | 
            +
            	def test_out
         | 
| 38 | 
            +
            		filename="/tmp/test_statsample_ggobi.xml"
         | 
| 39 | 
            +
            		go=Statsample::GGobi.out(@ds)
         | 
| 40 | 
            +
            		
         | 
| 41 | 
            +
            	end
         | 
| 42 | 
            +
            end
         | 
| @@ -0,0 +1,98 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__)+'/../lib/statsample.rb'
         | 
| 2 | 
            +
            require 'statsample/multiset'
         | 
| 3 | 
            +
            require 'test/unit'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            class StatsampleMultisetTestCase < Test::Unit::TestCase
         | 
| 6 | 
            +
            	def initialize(*args)
         | 
| 7 | 
            +
            		super
         | 
| 8 | 
            +
            	end
         | 
| 9 | 
            +
                def test_creation
         | 
| 10 | 
            +
                    v1a=[1,2,3,4,5].to_vector
         | 
| 11 | 
            +
                    v2b=[11,21,31,41,51].to_vector
         | 
| 12 | 
            +
                    v3c=[21,23,34,45,56].to_vector
         | 
| 13 | 
            +
                    ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
         | 
| 14 | 
            +
                    v1b=[15,25,35,45,55].to_vector
         | 
| 15 | 
            +
                    v2b=[11,21,31,41,51].to_vector
         | 
| 16 | 
            +
                    v3b=[21,23,34,45,56].to_vector
         | 
| 17 | 
            +
                    ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
         | 
| 18 | 
            +
                    ms=Statsample::Multiset.new(['v1','v2','v3'])
         | 
| 19 | 
            +
                    ms.add_dataset('ds1',ds1)
         | 
| 20 | 
            +
                    ms.add_dataset('ds2',ds2)
         | 
| 21 | 
            +
                    assert_equal(ds1,ms['ds1'])
         | 
| 22 | 
            +
                    assert_equal(ds2,ms['ds2'])
         | 
| 23 | 
            +
                    assert_equal(v1a,ms['ds1']['v1'])
         | 
| 24 | 
            +
                    assert_not_equal(v1b,ms['ds1']['v1'])
         | 
| 25 | 
            +
                    ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
         | 
| 26 | 
            +
                    assert_raise ArgumentError do
         | 
| 27 | 
            +
            			ms.add_dataset(ds3)
         | 
| 28 | 
            +
            		end
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
                def test_creation_empty
         | 
| 31 | 
            +
                    ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
         | 
| 32 | 
            +
                    ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
         | 
| 33 | 
            +
                    ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
         | 
| 34 | 
            +
                    ms2=Statsample::Multiset.new(%w{id age name})
         | 
| 35 | 
            +
                    ms2.add_dataset('male',ds_male)
         | 
| 36 | 
            +
                    ms2.add_dataset('female',ds_female)
         | 
| 37 | 
            +
                    assert_equal(ms2.fields,ms.fields)
         | 
| 38 | 
            +
                    assert_equal(ms2['male'],ms['male'])
         | 
| 39 | 
            +
                    assert_equal(ms2['female'],ms['female'])        
         | 
| 40 | 
            +
                end
         | 
| 41 | 
            +
                def test_to_multiset_by_split_one
         | 
| 42 | 
            +
                    sex=%w{m m m m m f f f f m}.to_vector(:nominal)
         | 
| 43 | 
            +
                    city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
         | 
| 44 | 
            +
                    age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
         | 
| 45 | 
            +
                    ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
         | 
| 46 | 
            +
                    ms=ds.to_multiset_by_split('sex')
         | 
| 47 | 
            +
                    assert_equal(2,ms.n_datasets)
         | 
| 48 | 
            +
                    assert_equal(%w{f m},ms.datasets.keys.sort)
         | 
| 49 | 
            +
                    assert_equal(6,ms['m'].cases)
         | 
| 50 | 
            +
                    assert_equal(4,ms['f'].cases)
         | 
| 51 | 
            +
                    assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
         | 
| 52 | 
            +
                    assert_equal([34,33,35,36],ms['f']['age'].to_a)        
         | 
| 53 | 
            +
                end
         | 
| 54 | 
            +
                def test_to_multiset_by_split_multiple
         | 
| 55 | 
            +
                    sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
         | 
| 56 | 
            +
                    city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
         | 
| 57 | 
            +
            		hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
         | 
| 58 | 
            +
                    age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
         | 
| 59 | 
            +
                    ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
         | 
| 60 | 
            +
                    ms=ds.to_multiset_by_split('sex','city','hair')
         | 
| 61 | 
            +
                    assert_equal(8,ms.n_datasets)
         | 
| 62 | 
            +
            		assert_equal(3,ms[%w{m London blonde}].cases)
         | 
| 63 | 
            +
            		assert_equal(3,ms[%w{m London blonde}].cases)
         | 
| 64 | 
            +
            		assert_equal(1,ms[%w{m Paris black}].cases)		
         | 
| 65 | 
            +
                end
         | 
| 66 | 
            +
            	
         | 
| 67 | 
            +
                def test_stratum_proportion
         | 
| 68 | 
            +
                    ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
         | 
| 69 | 
            +
                    ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
         | 
| 70 | 
            +
                    assert_equal(5.0/12, ds1['q1'].proportion )
         | 
| 71 | 
            +
                    assert_equal(7.0/9, ds2['q1'].proportion )
         | 
| 72 | 
            +
                    ms=Statsample::Multiset.new(['q1'])
         | 
| 73 | 
            +
                    ms.add_dataset('d1',ds1)
         | 
| 74 | 
            +
                    ms.add_dataset('d2',ds2)
         | 
| 75 | 
            +
                    ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
         | 
| 76 | 
            +
                    assert_in_delta(0.655, ss.proportion('q1'),0.01)
         | 
| 77 | 
            +
                    assert_in_delta(0.345, ss.proportion('q1',0),0.01)        
         | 
| 78 | 
            +
                    
         | 
| 79 | 
            +
                end
         | 
| 80 | 
            +
                def test_stratum_scale
         | 
| 81 | 
            +
                    boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
         | 
| 82 | 
            +
                    girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
         | 
| 83 | 
            +
                    ms=Statsample::Multiset.new(['test'])
         | 
| 84 | 
            +
                    ms.add_dataset('boys',boys)
         | 
| 85 | 
            +
                    ms.add_dataset('girls',girls)
         | 
| 86 | 
            +
                    ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
         | 
| 87 | 
            +
                    assert_equal(2,ss.strata_number)
         | 
| 88 | 
            +
                    assert_equal(20000,ss.population_size)
         | 
| 89 | 
            +
                    assert_equal(10000,ss.stratum_size('boys'))
         | 
| 90 | 
            +
                    assert_equal(10000,ss.stratum_size('girls'))
         | 
| 91 | 
            +
                    assert_equal(36,ss.sample_size)
         | 
| 92 | 
            +
                    assert_equal(75,ss.mean('test'))
         | 
| 93 | 
            +
                    assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
         | 
| 94 | 
            +
                    assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
         | 
| 95 | 
            +
                    
         | 
| 96 | 
            +
                    
         | 
| 97 | 
            +
                end
         | 
| 98 | 
            +
            end
         | 
| @@ -0,0 +1,108 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__)+'/../lib/statsample'
         | 
| 2 | 
            +
            require 'test/unit'
         | 
| 3 | 
            +
            class StatsampleRegressionTestCase < Test::Unit::TestCase
         | 
| 4 | 
            +
            	def initialize(*args)
         | 
| 5 | 
            +
            		@x=[13,20,10,33,15].to_vector(:scale)
         | 
| 6 | 
            +
            		@y=[23,18,35,10,27	].to_vector(:scale)
         | 
| 7 | 
            +
            		@reg=Statsample::Regression::SimpleRegression.new_from_vectors(@x,@y)
         | 
| 8 | 
            +
            		super
         | 
| 9 | 
            +
            	end
         | 
| 10 | 
            +
            	def test_parameters
         | 
| 11 | 
            +
            		assert_in_delta(40.009, @reg.a,0.001)
         | 
| 12 | 
            +
            		assert_in_delta(-0.957, @reg.b,0.001)
         | 
| 13 | 
            +
            		assert_in_delta(4.248,@reg.standard_error,0.002)
         | 
| 14 | 
            +
            	end
         | 
| 15 | 
            +
                def test_multiple_regression_pairwise_2
         | 
| 16 | 
            +
                    @a=[1,3,2,4,3,5,4,6,5,7,3,nil,3,nil,3].to_vector(:scale)
         | 
| 17 | 
            +
                    @b=[3,3,4,4,5,5,6,6,4,4,2,2,nil,6,2].to_vector(:scale)
         | 
| 18 | 
            +
                    @c=[11,22,30,40,50,65,78,79,99,100,nil,3,7,nil,7].to_vector(:scale)
         | 
| 19 | 
            +
                    @y=[3,4,5,6,7,8,9,10,20,30,30,40,nil,50,nil].to_vector(:scale)
         | 
| 20 | 
            +
                    ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
         | 
| 21 | 
            +
                    lr=Statsample::Regression::MultipleRegressionPairwise.new(ds,'y')
         | 
| 22 | 
            +
                    assert_in_delta(2407.436,lr.sst,0.001)
         | 
| 23 | 
            +
                    assert_in_delta(0.752,lr.r,0.001)
         | 
| 24 | 
            +
                    assert_in_delta(0.565,lr.r2,0.001)
         | 
| 25 | 
            +
                    assert_in_delta(1361.130,lr.ssr,0.001)
         | 
| 26 | 
            +
                    assert_in_delta(1046.306,lr.sse,0.001)
         | 
| 27 | 
            +
                    assert_in_delta(3.035,lr.f,0.001)
         | 
| 28 | 
            +
                    
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
                def test_multiple_regression_alglib
         | 
| 31 | 
            +
            	    if HAS_ALGIB
         | 
| 32 | 
            +
                    @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
         | 
| 33 | 
            +
                    @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
         | 
| 34 | 
            +
                    @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
         | 
| 35 | 
            +
                    @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
         | 
| 36 | 
            +
                    ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
         | 
| 37 | 
            +
                    lr=Statsample::Regression::MultipleRegressionAlglib.new(ds,'y')
         | 
| 38 | 
            +
                    model_test(lr)
         | 
| 39 | 
            +
                    predicted=[1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
         | 
| 40 | 
            +
                    c_predicted=lr.predicted
         | 
| 41 | 
            +
                    predicted.each_index{|i|
         | 
| 42 | 
            +
                        assert_in_delta(predicted[i],c_predicted[i],0.001)
         | 
| 43 | 
            +
                    }
         | 
| 44 | 
            +
                    residuals=[1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
         | 
| 45 | 
            +
                    c_residuals=lr.residuals
         | 
| 46 | 
            +
                    residuals.each_index{|i|
         | 
| 47 | 
            +
                        assert_in_delta(residuals[i],c_residuals[i],0.001)
         | 
| 48 | 
            +
                    }
         | 
| 49 | 
            +
            	else
         | 
| 50 | 
            +
            		puts "Regression::MultipleRegressionAlglib not tested (no Alglib)"
         | 
| 51 | 
            +
            	end
         | 
| 52 | 
            +
                end
         | 
| 53 | 
            +
                def model_test(lr)
         | 
| 54 | 
            +
                    assert_in_delta(0.695,lr.coeffs['a'],0.001)
         | 
| 55 | 
            +
                    assert_in_delta(11.027,lr.constant,0.001)
         | 
| 56 | 
            +
                    assert_in_delta(1.785,lr.process([1,3,11]),0.001)
         | 
| 57 | 
            +
                    
         | 
| 58 | 
            +
                    
         | 
| 59 | 
            +
                    s_coeffs={'a'=>0.151,'b'=>-0.547,'c'=>0.997}
         | 
| 60 | 
            +
                    cs_coeefs=lr.standarized_coeffs
         | 
| 61 | 
            +
                    s_coeffs.each_key{|k|
         | 
| 62 | 
            +
                        assert_in_delta(s_coeffs[k],cs_coeefs[k],0.001)
         | 
| 63 | 
            +
                    }
         | 
| 64 | 
            +
                    assert_in_delta(639.6,lr.sst,0.001)
         | 
| 65 | 
            +
                    assert_in_delta(583.76,lr.ssr,0.001)
         | 
| 66 | 
            +
                    assert_in_delta(55.840,lr.sse,0.001)
         | 
| 67 | 
            +
                    assert_in_delta(0.955,lr.r,0.001)
         | 
| 68 | 
            +
                    assert_in_delta(0.913,lr.r2,0.001)
         | 
| 69 | 
            +
                    assert_in_delta(20.908, lr.f,0.001)
         | 
| 70 | 
            +
            	if HAS_GSL
         | 
| 71 | 
            +
                    assert_in_delta(0.001, lr.significance, 0.001)
         | 
| 72 | 
            +
            	else
         | 
| 73 | 
            +
            		puts "#{lr.class}#significance not tested (not GSL)"
         | 
| 74 | 
            +
            	end
         | 
| 75 | 
            +
                    assert_in_delta(0.226,lr.tolerance("a"),0.001)
         | 
| 76 | 
            +
                    coeffs_se={"a"=>1.171,"b"=>1.129,"c"=>0.072}
         | 
| 77 | 
            +
                    ccoeffs_se=lr.coeffs_se
         | 
| 78 | 
            +
                    coeffs_se.each_key{|k|
         | 
| 79 | 
            +
                        assert_in_delta(coeffs_se[k],ccoeffs_se[k],0.001)
         | 
| 80 | 
            +
                    }
         | 
| 81 | 
            +
                    coeffs_t={"a"=>0.594,"b"=>-3.796,"c"=>3.703}
         | 
| 82 | 
            +
                    ccoeffs_t=lr.coeffs_t
         | 
| 83 | 
            +
                    coeffs_t.each_key{|k|
         | 
| 84 | 
            +
                        assert_in_delta(coeffs_t[k], ccoeffs_t[k],0.001)
         | 
| 85 | 
            +
                    }
         | 
| 86 | 
            +
                    assert_in_delta(4.559, lr.constant_se,0.001)
         | 
| 87 | 
            +
                    assert_in_delta(2.419, lr.constant_t,0.001)
         | 
| 88 | 
            +
                end
         | 
| 89 | 
            +
                def test_regression_pairwise
         | 
| 90 | 
            +
                    @a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
         | 
| 91 | 
            +
                    @b=[nil,3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
         | 
| 92 | 
            +
                    @c=[nil,11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
         | 
| 93 | 
            +
                    @y=[nil,3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
         | 
| 94 | 
            +
                    ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
         | 
| 95 | 
            +
                    lr=Statsample::Regression::MultipleRegressionPairwise.new(ds,'y')
         | 
| 96 | 
            +
                    model_test(lr)
         | 
| 97 | 
            +
                    predicted=[nil,1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
         | 
| 98 | 
            +
                    c_predicted = lr.predicted
         | 
| 99 | 
            +
                    predicted.each_index{|i|
         | 
| 100 | 
            +
                        assert_in_delta(predicted[i],c_predicted[i],0.001)
         | 
| 101 | 
            +
                    }
         | 
| 102 | 
            +
                    residuals=[nil,1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
         | 
| 103 | 
            +
                    c_residuals=lr.residuals
         | 
| 104 | 
            +
                    residuals.each_index{|i|
         | 
| 105 | 
            +
                        assert_in_delta(residuals[i],c_residuals[i],0.001)
         | 
| 106 | 
            +
                    }
         | 
| 107 | 
            +
                end
         | 
| 108 | 
            +
            end
         | 
| @@ -0,0 +1,32 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__)+'/../lib/statsample'
         | 
| 2 | 
            +
            require 'test/unit'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            class StatsampleReliabilityTestCase < Test::Unit::TestCase
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            	def initialize(*args)
         | 
| 7 | 
            +
            		super
         | 
| 8 | 
            +
                    @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
         | 
| 9 | 
            +
                    @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
         | 
| 10 | 
            +
                    @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
         | 
| 11 | 
            +
                    @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
         | 
| 12 | 
            +
                    @ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
         | 
| 13 | 
            +
            	end
         | 
| 14 | 
            +
               
         | 
| 15 | 
            +
                def test_general
         | 
| 16 | 
            +
                    ia=Statsample::Reliability::ItemAnalysis.new(@ds)
         | 
| 17 | 
            +
                    assert_in_delta(0.980,ia.alpha,0.001)
         | 
| 18 | 
            +
                    assert_in_delta(0.999,ia.alpha_standarized,0.001)
         | 
| 19 | 
            +
                    assert_in_delta(0.999,ia.item_total_correlation()['x1'],0.001)
         | 
| 20 | 
            +
                    assert_in_delta(1050.455,ia.stats_if_deleted()['x1'][:variance_sample],0.001)
         | 
| 21 | 
            +
                end	
         | 
| 22 | 
            +
                def test_icc
         | 
| 23 | 
            +
                    #p @x1.factors
         | 
| 24 | 
            +
                    icc=Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
         | 
| 25 | 
            +
                    # Need to create the test!!!!
         | 
| 26 | 
            +
                    #p icc.curve_field('x1',1).sort
         | 
| 27 | 
            +
                    #p icc.curve_field('x1',2).sort
         | 
| 28 | 
            +
                    #p icc.curve_field('x1',3).sort
         | 
| 29 | 
            +
                    #p icc.curve_field('x1',30).sort
         | 
| 30 | 
            +
                    
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
            end
         | 
| @@ -0,0 +1,23 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__)+'/../lib/statsample'
         | 
| 2 | 
            +
            require 'test/unit'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            class StatsampleResampleTestCase < Test::Unit::TestCase
         | 
| 5 | 
            +
            	def initialize(*args)
         | 
| 6 | 
            +
            		super
         | 
| 7 | 
            +
            	end
         | 
| 8 | 
            +
                def test_basic
         | 
| 9 | 
            +
                    r=Statsample::Resample.generate(20,1,10)
         | 
| 10 | 
            +
                    assert_equal(20,r.size)
         | 
| 11 | 
            +
                    assert(r.min>=1)
         | 
| 12 | 
            +
                    assert(r.max<=10)
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
                def test_repeat_and_save
         | 
| 15 | 
            +
                    r=Statsample::Resample.repeat_and_save(400) {
         | 
| 16 | 
            +
                        Statsample::Resample.generate(20,1,10).count(1)
         | 
| 17 | 
            +
                    }
         | 
| 18 | 
            +
                    assert_equal(400,r.size)
         | 
| 19 | 
            +
                    v=Statsample::Vector.new(r,:scale)
         | 
| 20 | 
            +
                    a=v.count {|x|  x > 3}
         | 
| 21 | 
            +
                    assert(a>=30 && a<=70)
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
            end
         | 
    
        data/test/test_srs.rb
    ADDED
    
    | @@ -0,0 +1,14 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__)+'/../lib/statsample'
         | 
| 2 | 
            +
            require 'test/unit'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            class StatsampleSrsTestCase < Test::Unit::TestCase
         | 
| 5 | 
            +
                def test_std_error
         | 
| 6 | 
            +
            	if HAS_GSL
         | 
| 7 | 
            +
                    assert_equal(384,Statsample::SRS.estimation_n0(0.05,0.5,0.95).to_i)
         | 
| 8 | 
            +
                    assert_equal(108,Statsample::SRS.estimation_n(0.05,0.5,150,0.95).to_i)
         | 
| 9 | 
            +
            	else
         | 
| 10 | 
            +
            		puts "Statsample::SRS.estimation_n0 not tested (needs ruby-gsl)"
         | 
| 11 | 
            +
            	end
         | 
| 12 | 
            +
                    assert_in_delta(0.0289,Statsample::SRS.proportion_sd_kp_wor(0.5,100,150),0.001)
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
            end
         | 
| @@ -0,0 +1,152 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__)+'/../lib/statsample'
         | 
| 2 | 
            +
            require 'test/unit'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            class StatsampleStatisicsTestCase < Test::Unit::TestCase
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            	def initialize(*args)
         | 
| 7 | 
            +
            		super
         | 
| 8 | 
            +
            	end
         | 
| 9 | 
            +
                def test_chi_square
         | 
| 10 | 
            +
                    assert_raise TypeError do
         | 
| 11 | 
            +
                        Statsample::Test.chi_square(1,1)
         | 
| 12 | 
            +
                    end
         | 
| 13 | 
            +
                    real=Matrix[[95,95],[45,155]]
         | 
| 14 | 
            +
                    expected=Matrix[[68,122],[72,128]]
         | 
| 15 | 
            +
                    assert_nothing_raised do
         | 
| 16 | 
            +
                        chi=Statsample::Test.chi_square(real,expected)
         | 
| 17 | 
            +
                    end
         | 
| 18 | 
            +
                    chi=Statsample::Test.chi_square(real,expected)
         | 
| 19 | 
            +
                    assert_in_delta(32.53,chi,0.1)
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
            	def test_sum_of_codeviated
         | 
| 22 | 
            +
            		v1=[1,2,3,4,5,6].to_vector(:scale)
         | 
| 23 | 
            +
            		v2=[6,2,4,10,12,8].to_vector(:scale)
         | 
| 24 | 
            +
            		assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
         | 
| 25 | 
            +
            	end
         | 
| 26 | 
            +
                def test_pearson
         | 
| 27 | 
            +
                    v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
         | 
| 28 | 
            +
                    v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
         | 
| 29 | 
            +
                    assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
         | 
| 30 | 
            +
                    v3=[6,2,  1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
         | 
| 31 | 
            +
                    v4=[2,nil,nil,nil,  3,7,8,6,4,3,2,500].to_vector(:scale)
         | 
| 32 | 
            +
                    assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
                def test_matrix_correlation
         | 
| 35 | 
            +
                    v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
         | 
| 36 | 
            +
                    v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
         | 
| 37 | 
            +
                    v3=[6,2,  1000,1000,5,4,7,8].to_vector(:scale)
         | 
| 38 | 
            +
                    v4=[2,nil,nil,nil,  3,7,8,6].to_vector(:scale)
         | 
| 39 | 
            +
                    ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
         | 
| 40 | 
            +
                    c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)} 
         | 
| 41 | 
            +
                    expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
         | 
| 42 | 
            +
                    [c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
         | 
| 43 | 
            +
                    ]
         | 
| 44 | 
            +
            	obt=Statsample::Bivariate.correlation_matrix(ds)
         | 
| 45 | 
            +
            	for i in 0...expected.row_size
         | 
| 46 | 
            +
            	for j in 0...expected.column_size
         | 
| 47 | 
            +
            		#puts expected[i,j].inspect
         | 
| 48 | 
            +
            		#puts obt[i,j].inspect
         | 
| 49 | 
            +
            		assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class}  ")
         | 
| 50 | 
            +
            	end
         | 
| 51 | 
            +
            	end
         | 
| 52 | 
            +
            #assert_equal(expected,obt)
         | 
| 53 | 
            +
                end
         | 
| 54 | 
            +
                def test_prop_pearson
         | 
| 55 | 
            +
            	if HAS_GSL    
         | 
| 56 | 
            +
                    assert_in_delta(0.42,Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94),94),0.01)        
         | 
| 57 | 
            +
                    assert_in_delta(0.65,Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95),95),0.01)
         | 
| 58 | 
            +
            	else
         | 
| 59 | 
            +
            		puts "Bivariate.prop_pearson not tested (no ruby-gsl)"
         | 
| 60 | 
            +
            	end
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
            	def test_covariance
         | 
| 63 | 
            +
            		if HAS_GSL
         | 
| 64 | 
            +
            			v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
         | 
| 65 | 
            +
            			v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
         | 
| 66 | 
            +
            			assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
         | 
| 67 | 
            +
            			
         | 
| 68 | 
            +
            		end
         | 
| 69 | 
            +
            	end
         | 
| 70 | 
            +
            	def test_spearman
         | 
| 71 | 
            +
            		v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
         | 
| 72 | 
            +
            		v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
         | 
| 73 | 
            +
                    assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
         | 
| 74 | 
            +
            	end
         | 
| 75 | 
            +
            	def test_point_biserial
         | 
| 76 | 
            +
            		c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
         | 
| 77 | 
            +
            		d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
         | 
| 78 | 
            +
            		assert_raise TypeError do
         | 
| 79 | 
            +
            			Statsample::Bivariate.point_biserial(c,d)
         | 
| 80 | 
            +
            		end
         | 
| 81 | 
            +
            		assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
         | 
| 82 | 
            +
            	end
         | 
| 83 | 
            +
            	def test_tau
         | 
| 84 | 
            +
            		v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
         | 
| 85 | 
            +
            		v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
         | 
| 86 | 
            +
            		assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
         | 
| 87 | 
            +
            		assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
         | 
| 88 | 
            +
            		v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
         | 
| 89 | 
            +
            		v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
         | 
| 90 | 
            +
            		assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
         | 
| 91 | 
            +
            	end
         | 
| 92 | 
            +
            	def test_gamma
         | 
| 93 | 
            +
            		m=Matrix[[10,5,2],[10,15,20]]
         | 
| 94 | 
            +
            		assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
         | 
| 95 | 
            +
            		m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
         | 
| 96 | 
            +
            		assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
         | 
| 97 | 
            +
            		
         | 
| 98 | 
            +
             | 
| 99 | 
            +
            	end
         | 
| 100 | 
            +
                def test_estimation_mean              
         | 
| 101 | 
            +
                    v=([42]*23+[41]*4+[36]*1+[32]*1+[29]*1+[27]*2+[23]*1+[19]*1+[16]*2+[15]*2+[14,11,10,9,7]+ [6]*3+[5]*2+[4,3]).to_vector(:scale)
         | 
| 102 | 
            +
                    assert_equal(50,v.size)
         | 
| 103 | 
            +
                    assert_equal(1471,v.sum())
         | 
| 104 | 
            +
            	if HAS_GSL
         | 
| 105 | 
            +
                    limits=Statsample::SRS.mean_confidence_interval_z(v.mean(), v.sds(), v.size,676,0.80)
         | 
| 106 | 
            +
                   else
         | 
| 107 | 
            +
            	       puts "SRS.mean_confidence_interval_z not tested (no ruby-gsl)"
         | 
| 108 | 
            +
            	       end
         | 
| 109 | 
            +
                end
         | 
| 110 | 
            +
                def test_estimation_proportion
         | 
| 111 | 
            +
                    # total
         | 
| 112 | 
            +
                    pop=3042
         | 
| 113 | 
            +
                    sam=200
         | 
| 114 | 
            +
                    prop=0.19
         | 
| 115 | 
            +
                    assert_in_delta(81.8, Statsample::SRS.proportion_total_sd_ep_wor(prop, sam, pop), 0.1)
         | 
| 116 | 
            +
                    
         | 
| 117 | 
            +
                    # confidence limits
         | 
| 118 | 
            +
                    pop=500
         | 
| 119 | 
            +
                    sam=100
         | 
| 120 | 
            +
                    prop=0.37
         | 
| 121 | 
            +
                    a=0.95
         | 
| 122 | 
            +
            	if HAS_GSL
         | 
| 123 | 
            +
                    l= Statsample::SRS.proportion_confidence_interval_z(prop, sam, pop, a)
         | 
| 124 | 
            +
                    assert_in_delta(0.28,l[0],0.01)
         | 
| 125 | 
            +
                    assert_in_delta(0.46,l[1],0.01)
         | 
| 126 | 
            +
            	else
         | 
| 127 | 
            +
            	       puts "SRS.proportion_confidence_interval_z not tested (no ruby-gsl)"
         | 
| 128 | 
            +
            		
         | 
| 129 | 
            +
            		end
         | 
| 130 | 
            +
                end
         | 
| 131 | 
            +
                def test_simple_linear_regression
         | 
| 132 | 
            +
            		a=[1,2,3,4,5,6].to_vector(:scale)
         | 
| 133 | 
            +
            		b=[6,2,4,10,12,8].to_vector(:scale)
         | 
| 134 | 
            +
            		reg = Statsample::Regression::SimpleRegression.new_from_vectors(a,b)
         | 
| 135 | 
            +
                    assert_in_delta((reg.ssr+reg.sse).to_f,reg.sst,0.001)
         | 
| 136 | 
            +
                    assert_in_delta(Statsample::Bivariate.pearson(a,b),reg.r,0.001)
         | 
| 137 | 
            +
            		assert_in_delta(2.4,reg.a,0.01)
         | 
| 138 | 
            +
            		assert_in_delta(1.314,reg.b,0.001)
         | 
| 139 | 
            +
            		assert_in_delta(0.657,reg.r,0.001)
         | 
| 140 | 
            +
            		assert_in_delta(0.432,reg.r2,0.001)
         | 
| 141 | 
            +
                    
         | 
| 142 | 
            +
            	end
         | 
| 143 | 
            +
                def a_test_multiple_regression
         | 
| 144 | 
            +
                    x1=[1,2,3,4,5,6].to_vector(:scale)
         | 
| 145 | 
            +
                    x2=[3,5,8,9,10,20].to_vector(:scale)
         | 
| 146 | 
            +
                    x3=[100,90,50,30,50,10].to_vector(:scale)
         | 
| 147 | 
            +
            		y=[6,2,4,10,12,8].to_vector(:scale)
         | 
| 148 | 
            +
                    reg=Statsample::Regression::MultipleRegression.new_from_vectors([x1,x2,x3],y)
         | 
| 149 | 
            +
                    # p reg
         | 
| 150 | 
            +
                end
         | 
| 151 | 
            +
                
         | 
| 152 | 
            +
            end
         |