statsample 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +79 -0
- data/Manifest.txt +56 -0
- data/README.txt +77 -0
- data/Rakefile +22 -0
- data/bin/statsample +2 -0
- data/demo/benchmark.rb +52 -0
- data/demo/chi-square.rb +44 -0
- data/demo/dice.rb +13 -0
- data/demo/distribution_t.rb +95 -0
- data/demo/graph.rb +9 -0
- data/demo/item_analysis.rb +30 -0
- data/demo/mean.rb +81 -0
- data/demo/proportion.rb +57 -0
- data/demo/sample_test.csv +113 -0
- data/demo/strata_proportion.rb +152 -0
- data/demo/stratum.rb +141 -0
- data/lib/spss.rb +131 -0
- data/lib/statsample.rb +216 -0
- data/lib/statsample/anova.rb +74 -0
- data/lib/statsample/bivariate.rb +255 -0
- data/lib/statsample/chidistribution.rb +39 -0
- data/lib/statsample/codification.rb +120 -0
- data/lib/statsample/converters.rb +338 -0
- data/lib/statsample/crosstab.rb +122 -0
- data/lib/statsample/dataset.rb +526 -0
- data/lib/statsample/dominanceanalysis.rb +259 -0
- data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
- data/lib/statsample/graph/gdchart.rb +45 -0
- data/lib/statsample/graph/svgboxplot.rb +108 -0
- data/lib/statsample/graph/svggraph.rb +181 -0
- data/lib/statsample/graph/svghistogram.rb +208 -0
- data/lib/statsample/graph/svgscatterplot.rb +111 -0
- data/lib/statsample/htmlreport.rb +232 -0
- data/lib/statsample/multiset.rb +281 -0
- data/lib/statsample/regression.rb +522 -0
- data/lib/statsample/reliability.rb +235 -0
- data/lib/statsample/resample.rb +20 -0
- data/lib/statsample/srs.rb +159 -0
- data/lib/statsample/test.rb +25 -0
- data/lib/statsample/vector.rb +759 -0
- data/test/_test_chart.rb +58 -0
- data/test/test_anova.rb +31 -0
- data/test/test_codification.rb +59 -0
- data/test/test_crosstab.rb +55 -0
- data/test/test_csv.csv +7 -0
- data/test/test_csv.rb +27 -0
- data/test/test_dataset.rb +293 -0
- data/test/test_ggobi.rb +42 -0
- data/test/test_multiset.rb +98 -0
- data/test/test_regression.rb +108 -0
- data/test/test_reliability.rb +32 -0
- data/test/test_resample.rb +23 -0
- data/test/test_srs.rb +14 -0
- data/test/test_statistics.rb +152 -0
- data/test/test_stratified.rb +19 -0
- data/test/test_svg_graph.rb +63 -0
- data/test/test_vector.rb +265 -0
- data/test/test_xls.rb +32 -0
- metadata +158 -0
data/test/test_ggobi.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'statsample/multiset'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StatsampleGGobiTestCase < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
super
|
9
|
+
v1=([10.2,20.3,10,20,30,40,30,20,30,40]*10).to_vector(:scale)
|
10
|
+
@v2=(%w{a b c a a a b b c d}*10).to_vector(:nominal)
|
11
|
+
@v2.labels={"a"=>"letter a","d"=>"letter d"}
|
12
|
+
v3=([1,2,3,4,5,4,3,2,1,2]*10).to_vector(:ordinal)
|
13
|
+
@ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
|
14
|
+
end
|
15
|
+
def test_values_definition
|
16
|
+
a=[1.0,2,"a"]
|
17
|
+
assert_equal("<real>1.0</real> <int>2</int> <string>a</string>",Statsample::GGobi.values_definition(a))
|
18
|
+
end
|
19
|
+
def test_variable_definition
|
20
|
+
carrier=OpenStruct.new
|
21
|
+
carrier.categorials=[]
|
22
|
+
carrier.conversions={}
|
23
|
+
real_var_definition=Statsample::GGobi.variable_definition(carrier,@v2,'variable 2',"v2")
|
24
|
+
expected=<<EOS
|
25
|
+
<categoricalvariable name="variable 2" nickname="v2">
|
26
|
+
<levels count="4">
|
27
|
+
<level value="1">letter a</level>
|
28
|
+
<level value="2">b</level>
|
29
|
+
<level value="3">c</level>
|
30
|
+
<level value="4">letter d</level></levels>
|
31
|
+
</categoricalvariable>
|
32
|
+
EOS
|
33
|
+
assert_equal(expected.gsub(/\s/," "),real_var_definition.gsub(/\s/," "))
|
34
|
+
assert_equal({'variable 2'=>{'a'=>1,'b'=>2,'c'=>3,'d'=>4}},carrier.conversions)
|
35
|
+
assert_equal(['variable 2'],carrier.categorials)
|
36
|
+
end
|
37
|
+
def test_out
|
38
|
+
filename="/tmp/test_statsample_ggobi.xml"
|
39
|
+
go=Statsample::GGobi.out(@ds)
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample.rb'
|
2
|
+
require 'statsample/multiset'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StatsampleMultisetTestCase < Test::Unit::TestCase
|
6
|
+
def initialize(*args)
|
7
|
+
super
|
8
|
+
end
|
9
|
+
def test_creation
|
10
|
+
v1a=[1,2,3,4,5].to_vector
|
11
|
+
v2b=[11,21,31,41,51].to_vector
|
12
|
+
v3c=[21,23,34,45,56].to_vector
|
13
|
+
ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
|
14
|
+
v1b=[15,25,35,45,55].to_vector
|
15
|
+
v2b=[11,21,31,41,51].to_vector
|
16
|
+
v3b=[21,23,34,45,56].to_vector
|
17
|
+
ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
|
18
|
+
ms=Statsample::Multiset.new(['v1','v2','v3'])
|
19
|
+
ms.add_dataset('ds1',ds1)
|
20
|
+
ms.add_dataset('ds2',ds2)
|
21
|
+
assert_equal(ds1,ms['ds1'])
|
22
|
+
assert_equal(ds2,ms['ds2'])
|
23
|
+
assert_equal(v1a,ms['ds1']['v1'])
|
24
|
+
assert_not_equal(v1b,ms['ds1']['v1'])
|
25
|
+
ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
|
26
|
+
assert_raise ArgumentError do
|
27
|
+
ms.add_dataset(ds3)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
def test_creation_empty
|
31
|
+
ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
|
32
|
+
ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
|
33
|
+
ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
|
34
|
+
ms2=Statsample::Multiset.new(%w{id age name})
|
35
|
+
ms2.add_dataset('male',ds_male)
|
36
|
+
ms2.add_dataset('female',ds_female)
|
37
|
+
assert_equal(ms2.fields,ms.fields)
|
38
|
+
assert_equal(ms2['male'],ms['male'])
|
39
|
+
assert_equal(ms2['female'],ms['female'])
|
40
|
+
end
|
41
|
+
def test_to_multiset_by_split_one
|
42
|
+
sex=%w{m m m m m f f f f m}.to_vector(:nominal)
|
43
|
+
city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
|
44
|
+
age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
|
45
|
+
ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
|
46
|
+
ms=ds.to_multiset_by_split('sex')
|
47
|
+
assert_equal(2,ms.n_datasets)
|
48
|
+
assert_equal(%w{f m},ms.datasets.keys.sort)
|
49
|
+
assert_equal(6,ms['m'].cases)
|
50
|
+
assert_equal(4,ms['f'].cases)
|
51
|
+
assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
|
52
|
+
assert_equal([34,33,35,36],ms['f']['age'].to_a)
|
53
|
+
end
|
54
|
+
def test_to_multiset_by_split_multiple
|
55
|
+
sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
|
56
|
+
city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
|
57
|
+
hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
|
58
|
+
age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
|
59
|
+
ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
|
60
|
+
ms=ds.to_multiset_by_split('sex','city','hair')
|
61
|
+
assert_equal(8,ms.n_datasets)
|
62
|
+
assert_equal(3,ms[%w{m London blonde}].cases)
|
63
|
+
assert_equal(3,ms[%w{m London blonde}].cases)
|
64
|
+
assert_equal(1,ms[%w{m Paris black}].cases)
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_stratum_proportion
|
68
|
+
ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
|
69
|
+
ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
|
70
|
+
assert_equal(5.0/12, ds1['q1'].proportion )
|
71
|
+
assert_equal(7.0/9, ds2['q1'].proportion )
|
72
|
+
ms=Statsample::Multiset.new(['q1'])
|
73
|
+
ms.add_dataset('d1',ds1)
|
74
|
+
ms.add_dataset('d2',ds2)
|
75
|
+
ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
|
76
|
+
assert_in_delta(0.655, ss.proportion('q1'),0.01)
|
77
|
+
assert_in_delta(0.345, ss.proportion('q1',0),0.01)
|
78
|
+
|
79
|
+
end
|
80
|
+
def test_stratum_scale
|
81
|
+
boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
|
82
|
+
girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
|
83
|
+
ms=Statsample::Multiset.new(['test'])
|
84
|
+
ms.add_dataset('boys',boys)
|
85
|
+
ms.add_dataset('girls',girls)
|
86
|
+
ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
|
87
|
+
assert_equal(2,ss.strata_number)
|
88
|
+
assert_equal(20000,ss.population_size)
|
89
|
+
assert_equal(10000,ss.stratum_size('boys'))
|
90
|
+
assert_equal(10000,ss.stratum_size('girls'))
|
91
|
+
assert_equal(36,ss.sample_size)
|
92
|
+
assert_equal(75,ss.mean('test'))
|
93
|
+
assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
|
94
|
+
assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
|
95
|
+
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
class StatsampleRegressionTestCase < Test::Unit::TestCase
|
4
|
+
def initialize(*args)
|
5
|
+
@x=[13,20,10,33,15].to_vector(:scale)
|
6
|
+
@y=[23,18,35,10,27 ].to_vector(:scale)
|
7
|
+
@reg=Statsample::Regression::SimpleRegression.new_from_vectors(@x,@y)
|
8
|
+
super
|
9
|
+
end
|
10
|
+
def test_parameters
|
11
|
+
assert_in_delta(40.009, @reg.a,0.001)
|
12
|
+
assert_in_delta(-0.957, @reg.b,0.001)
|
13
|
+
assert_in_delta(4.248,@reg.standard_error,0.002)
|
14
|
+
end
|
15
|
+
def test_multiple_regression_pairwise_2
|
16
|
+
@a=[1,3,2,4,3,5,4,6,5,7,3,nil,3,nil,3].to_vector(:scale)
|
17
|
+
@b=[3,3,4,4,5,5,6,6,4,4,2,2,nil,6,2].to_vector(:scale)
|
18
|
+
@c=[11,22,30,40,50,65,78,79,99,100,nil,3,7,nil,7].to_vector(:scale)
|
19
|
+
@y=[3,4,5,6,7,8,9,10,20,30,30,40,nil,50,nil].to_vector(:scale)
|
20
|
+
ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
21
|
+
lr=Statsample::Regression::MultipleRegressionPairwise.new(ds,'y')
|
22
|
+
assert_in_delta(2407.436,lr.sst,0.001)
|
23
|
+
assert_in_delta(0.752,lr.r,0.001)
|
24
|
+
assert_in_delta(0.565,lr.r2,0.001)
|
25
|
+
assert_in_delta(1361.130,lr.ssr,0.001)
|
26
|
+
assert_in_delta(1046.306,lr.sse,0.001)
|
27
|
+
assert_in_delta(3.035,lr.f,0.001)
|
28
|
+
|
29
|
+
end
|
30
|
+
def test_multiple_regression_alglib
|
31
|
+
if HAS_ALGIB
|
32
|
+
@a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
33
|
+
@b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
34
|
+
@c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
35
|
+
@y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
36
|
+
ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
37
|
+
lr=Statsample::Regression::MultipleRegressionAlglib.new(ds,'y')
|
38
|
+
model_test(lr)
|
39
|
+
predicted=[1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
|
40
|
+
c_predicted=lr.predicted
|
41
|
+
predicted.each_index{|i|
|
42
|
+
assert_in_delta(predicted[i],c_predicted[i],0.001)
|
43
|
+
}
|
44
|
+
residuals=[1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
|
45
|
+
c_residuals=lr.residuals
|
46
|
+
residuals.each_index{|i|
|
47
|
+
assert_in_delta(residuals[i],c_residuals[i],0.001)
|
48
|
+
}
|
49
|
+
else
|
50
|
+
puts "Regression::MultipleRegressionAlglib not tested (no Alglib)"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
def model_test(lr)
|
54
|
+
assert_in_delta(0.695,lr.coeffs['a'],0.001)
|
55
|
+
assert_in_delta(11.027,lr.constant,0.001)
|
56
|
+
assert_in_delta(1.785,lr.process([1,3,11]),0.001)
|
57
|
+
|
58
|
+
|
59
|
+
s_coeffs={'a'=>0.151,'b'=>-0.547,'c'=>0.997}
|
60
|
+
cs_coeefs=lr.standarized_coeffs
|
61
|
+
s_coeffs.each_key{|k|
|
62
|
+
assert_in_delta(s_coeffs[k],cs_coeefs[k],0.001)
|
63
|
+
}
|
64
|
+
assert_in_delta(639.6,lr.sst,0.001)
|
65
|
+
assert_in_delta(583.76,lr.ssr,0.001)
|
66
|
+
assert_in_delta(55.840,lr.sse,0.001)
|
67
|
+
assert_in_delta(0.955,lr.r,0.001)
|
68
|
+
assert_in_delta(0.913,lr.r2,0.001)
|
69
|
+
assert_in_delta(20.908, lr.f,0.001)
|
70
|
+
if HAS_GSL
|
71
|
+
assert_in_delta(0.001, lr.significance, 0.001)
|
72
|
+
else
|
73
|
+
puts "#{lr.class}#significance not tested (not GSL)"
|
74
|
+
end
|
75
|
+
assert_in_delta(0.226,lr.tolerance("a"),0.001)
|
76
|
+
coeffs_se={"a"=>1.171,"b"=>1.129,"c"=>0.072}
|
77
|
+
ccoeffs_se=lr.coeffs_se
|
78
|
+
coeffs_se.each_key{|k|
|
79
|
+
assert_in_delta(coeffs_se[k],ccoeffs_se[k],0.001)
|
80
|
+
}
|
81
|
+
coeffs_t={"a"=>0.594,"b"=>-3.796,"c"=>3.703}
|
82
|
+
ccoeffs_t=lr.coeffs_t
|
83
|
+
coeffs_t.each_key{|k|
|
84
|
+
assert_in_delta(coeffs_t[k], ccoeffs_t[k],0.001)
|
85
|
+
}
|
86
|
+
assert_in_delta(4.559, lr.constant_se,0.001)
|
87
|
+
assert_in_delta(2.419, lr.constant_t,0.001)
|
88
|
+
end
|
89
|
+
def test_regression_pairwise
|
90
|
+
@a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
91
|
+
@b=[nil,3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
92
|
+
@c=[nil,11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
93
|
+
@y=[nil,3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
94
|
+
ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
95
|
+
lr=Statsample::Regression::MultipleRegressionPairwise.new(ds,'y')
|
96
|
+
model_test(lr)
|
97
|
+
predicted=[nil,1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
|
98
|
+
c_predicted = lr.predicted
|
99
|
+
predicted.each_index{|i|
|
100
|
+
assert_in_delta(predicted[i],c_predicted[i],0.001)
|
101
|
+
}
|
102
|
+
residuals=[nil,1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
|
103
|
+
c_residuals=lr.residuals
|
104
|
+
residuals.each_index{|i|
|
105
|
+
assert_in_delta(residuals[i],c_residuals[i],0.001)
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleReliabilityTestCase < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def initialize(*args)
|
7
|
+
super
|
8
|
+
@x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
|
9
|
+
@x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
|
10
|
+
@x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
|
11
|
+
@x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
|
12
|
+
@ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_general
|
16
|
+
ia=Statsample::Reliability::ItemAnalysis.new(@ds)
|
17
|
+
assert_in_delta(0.980,ia.alpha,0.001)
|
18
|
+
assert_in_delta(0.999,ia.alpha_standarized,0.001)
|
19
|
+
assert_in_delta(0.999,ia.item_total_correlation()['x1'],0.001)
|
20
|
+
assert_in_delta(1050.455,ia.stats_if_deleted()['x1'][:variance_sample],0.001)
|
21
|
+
end
|
22
|
+
def test_icc
|
23
|
+
#p @x1.factors
|
24
|
+
icc=Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
|
25
|
+
# Need to create the test!!!!
|
26
|
+
#p icc.curve_field('x1',1).sort
|
27
|
+
#p icc.curve_field('x1',2).sort
|
28
|
+
#p icc.curve_field('x1',3).sort
|
29
|
+
#p icc.curve_field('x1',30).sort
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleResampleTestCase < Test::Unit::TestCase
|
5
|
+
def initialize(*args)
|
6
|
+
super
|
7
|
+
end
|
8
|
+
def test_basic
|
9
|
+
r=Statsample::Resample.generate(20,1,10)
|
10
|
+
assert_equal(20,r.size)
|
11
|
+
assert(r.min>=1)
|
12
|
+
assert(r.max<=10)
|
13
|
+
end
|
14
|
+
def test_repeat_and_save
|
15
|
+
r=Statsample::Resample.repeat_and_save(400) {
|
16
|
+
Statsample::Resample.generate(20,1,10).count(1)
|
17
|
+
}
|
18
|
+
assert_equal(400,r.size)
|
19
|
+
v=Statsample::Vector.new(r,:scale)
|
20
|
+
a=v.count {|x| x > 3}
|
21
|
+
assert(a>=30 && a<=70)
|
22
|
+
end
|
23
|
+
end
|
data/test/test_srs.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleSrsTestCase < Test::Unit::TestCase
|
5
|
+
def test_std_error
|
6
|
+
if HAS_GSL
|
7
|
+
assert_equal(384,Statsample::SRS.estimation_n0(0.05,0.5,0.95).to_i)
|
8
|
+
assert_equal(108,Statsample::SRS.estimation_n(0.05,0.5,150,0.95).to_i)
|
9
|
+
else
|
10
|
+
puts "Statsample::SRS.estimation_n0 not tested (needs ruby-gsl)"
|
11
|
+
end
|
12
|
+
assert_in_delta(0.0289,Statsample::SRS.proportion_sd_kp_wor(0.5,100,150),0.001)
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleStatisicsTestCase < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def initialize(*args)
|
7
|
+
super
|
8
|
+
end
|
9
|
+
def test_chi_square
|
10
|
+
assert_raise TypeError do
|
11
|
+
Statsample::Test.chi_square(1,1)
|
12
|
+
end
|
13
|
+
real=Matrix[[95,95],[45,155]]
|
14
|
+
expected=Matrix[[68,122],[72,128]]
|
15
|
+
assert_nothing_raised do
|
16
|
+
chi=Statsample::Test.chi_square(real,expected)
|
17
|
+
end
|
18
|
+
chi=Statsample::Test.chi_square(real,expected)
|
19
|
+
assert_in_delta(32.53,chi,0.1)
|
20
|
+
end
|
21
|
+
def test_sum_of_codeviated
|
22
|
+
v1=[1,2,3,4,5,6].to_vector(:scale)
|
23
|
+
v2=[6,2,4,10,12,8].to_vector(:scale)
|
24
|
+
assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
|
25
|
+
end
|
26
|
+
def test_pearson
|
27
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
28
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
29
|
+
assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
|
30
|
+
v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
|
31
|
+
v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
|
32
|
+
assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
|
33
|
+
end
|
34
|
+
def test_matrix_correlation
|
35
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
36
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
37
|
+
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
38
|
+
v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
|
39
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
|
40
|
+
c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
|
41
|
+
expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
|
42
|
+
[c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
|
43
|
+
]
|
44
|
+
obt=Statsample::Bivariate.correlation_matrix(ds)
|
45
|
+
for i in 0...expected.row_size
|
46
|
+
for j in 0...expected.column_size
|
47
|
+
#puts expected[i,j].inspect
|
48
|
+
#puts obt[i,j].inspect
|
49
|
+
assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
#assert_equal(expected,obt)
|
53
|
+
end
|
54
|
+
def test_prop_pearson
|
55
|
+
if HAS_GSL
|
56
|
+
assert_in_delta(0.42,Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94),94),0.01)
|
57
|
+
assert_in_delta(0.65,Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95),95),0.01)
|
58
|
+
else
|
59
|
+
puts "Bivariate.prop_pearson not tested (no ruby-gsl)"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
def test_covariance
|
63
|
+
if HAS_GSL
|
64
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
65
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
66
|
+
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
def test_spearman
|
71
|
+
v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
|
72
|
+
v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
|
73
|
+
assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
|
74
|
+
end
|
75
|
+
def test_point_biserial
|
76
|
+
c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
|
77
|
+
d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
|
78
|
+
assert_raise TypeError do
|
79
|
+
Statsample::Bivariate.point_biserial(c,d)
|
80
|
+
end
|
81
|
+
assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
|
82
|
+
end
|
83
|
+
def test_tau
|
84
|
+
v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
|
85
|
+
v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
|
86
|
+
assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
|
87
|
+
assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
|
88
|
+
v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
|
89
|
+
v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
|
90
|
+
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
|
91
|
+
end
|
92
|
+
def test_gamma
|
93
|
+
m=Matrix[[10,5,2],[10,15,20]]
|
94
|
+
assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
|
95
|
+
m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
|
96
|
+
assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
|
97
|
+
|
98
|
+
|
99
|
+
end
|
100
|
+
def test_estimation_mean
|
101
|
+
v=([42]*23+[41]*4+[36]*1+[32]*1+[29]*1+[27]*2+[23]*1+[19]*1+[16]*2+[15]*2+[14,11,10,9,7]+ [6]*3+[5]*2+[4,3]).to_vector(:scale)
|
102
|
+
assert_equal(50,v.size)
|
103
|
+
assert_equal(1471,v.sum())
|
104
|
+
if HAS_GSL
|
105
|
+
limits=Statsample::SRS.mean_confidence_interval_z(v.mean(), v.sds(), v.size,676,0.80)
|
106
|
+
else
|
107
|
+
puts "SRS.mean_confidence_interval_z not tested (no ruby-gsl)"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
def test_estimation_proportion
|
111
|
+
# total
|
112
|
+
pop=3042
|
113
|
+
sam=200
|
114
|
+
prop=0.19
|
115
|
+
assert_in_delta(81.8, Statsample::SRS.proportion_total_sd_ep_wor(prop, sam, pop), 0.1)
|
116
|
+
|
117
|
+
# confidence limits
|
118
|
+
pop=500
|
119
|
+
sam=100
|
120
|
+
prop=0.37
|
121
|
+
a=0.95
|
122
|
+
if HAS_GSL
|
123
|
+
l= Statsample::SRS.proportion_confidence_interval_z(prop, sam, pop, a)
|
124
|
+
assert_in_delta(0.28,l[0],0.01)
|
125
|
+
assert_in_delta(0.46,l[1],0.01)
|
126
|
+
else
|
127
|
+
puts "SRS.proportion_confidence_interval_z not tested (no ruby-gsl)"
|
128
|
+
|
129
|
+
end
|
130
|
+
end
|
131
|
+
def test_simple_linear_regression
|
132
|
+
a=[1,2,3,4,5,6].to_vector(:scale)
|
133
|
+
b=[6,2,4,10,12,8].to_vector(:scale)
|
134
|
+
reg = Statsample::Regression::SimpleRegression.new_from_vectors(a,b)
|
135
|
+
assert_in_delta((reg.ssr+reg.sse).to_f,reg.sst,0.001)
|
136
|
+
assert_in_delta(Statsample::Bivariate.pearson(a,b),reg.r,0.001)
|
137
|
+
assert_in_delta(2.4,reg.a,0.01)
|
138
|
+
assert_in_delta(1.314,reg.b,0.001)
|
139
|
+
assert_in_delta(0.657,reg.r,0.001)
|
140
|
+
assert_in_delta(0.432,reg.r2,0.001)
|
141
|
+
|
142
|
+
end
|
143
|
+
def a_test_multiple_regression
|
144
|
+
x1=[1,2,3,4,5,6].to_vector(:scale)
|
145
|
+
x2=[3,5,8,9,10,20].to_vector(:scale)
|
146
|
+
x3=[100,90,50,30,50,10].to_vector(:scale)
|
147
|
+
y=[6,2,4,10,12,8].to_vector(:scale)
|
148
|
+
reg=Statsample::Regression::MultipleRegression.new_from_vectors([x1,x2,x3],y)
|
149
|
+
# p reg
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|