statsample 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +79 -0
- data/Manifest.txt +56 -0
- data/README.txt +77 -0
- data/Rakefile +22 -0
- data/bin/statsample +2 -0
- data/demo/benchmark.rb +52 -0
- data/demo/chi-square.rb +44 -0
- data/demo/dice.rb +13 -0
- data/demo/distribution_t.rb +95 -0
- data/demo/graph.rb +9 -0
- data/demo/item_analysis.rb +30 -0
- data/demo/mean.rb +81 -0
- data/demo/proportion.rb +57 -0
- data/demo/sample_test.csv +113 -0
- data/demo/strata_proportion.rb +152 -0
- data/demo/stratum.rb +141 -0
- data/lib/spss.rb +131 -0
- data/lib/statsample.rb +216 -0
- data/lib/statsample/anova.rb +74 -0
- data/lib/statsample/bivariate.rb +255 -0
- data/lib/statsample/chidistribution.rb +39 -0
- data/lib/statsample/codification.rb +120 -0
- data/lib/statsample/converters.rb +338 -0
- data/lib/statsample/crosstab.rb +122 -0
- data/lib/statsample/dataset.rb +526 -0
- data/lib/statsample/dominanceanalysis.rb +259 -0
- data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
- data/lib/statsample/graph/gdchart.rb +45 -0
- data/lib/statsample/graph/svgboxplot.rb +108 -0
- data/lib/statsample/graph/svggraph.rb +181 -0
- data/lib/statsample/graph/svghistogram.rb +208 -0
- data/lib/statsample/graph/svgscatterplot.rb +111 -0
- data/lib/statsample/htmlreport.rb +232 -0
- data/lib/statsample/multiset.rb +281 -0
- data/lib/statsample/regression.rb +522 -0
- data/lib/statsample/reliability.rb +235 -0
- data/lib/statsample/resample.rb +20 -0
- data/lib/statsample/srs.rb +159 -0
- data/lib/statsample/test.rb +25 -0
- data/lib/statsample/vector.rb +759 -0
- data/test/_test_chart.rb +58 -0
- data/test/test_anova.rb +31 -0
- data/test/test_codification.rb +59 -0
- data/test/test_crosstab.rb +55 -0
- data/test/test_csv.csv +7 -0
- data/test/test_csv.rb +27 -0
- data/test/test_dataset.rb +293 -0
- data/test/test_ggobi.rb +42 -0
- data/test/test_multiset.rb +98 -0
- data/test/test_regression.rb +108 -0
- data/test/test_reliability.rb +32 -0
- data/test/test_resample.rb +23 -0
- data/test/test_srs.rb +14 -0
- data/test/test_statistics.rb +152 -0
- data/test/test_stratified.rb +19 -0
- data/test/test_svg_graph.rb +63 -0
- data/test/test_vector.rb +265 -0
- data/test/test_xls.rb +32 -0
- metadata +158 -0
data/test/test_ggobi.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'statsample/multiset'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StatsampleGGobiTestCase < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
super
|
9
|
+
v1=([10.2,20.3,10,20,30,40,30,20,30,40]*10).to_vector(:scale)
|
10
|
+
@v2=(%w{a b c a a a b b c d}*10).to_vector(:nominal)
|
11
|
+
@v2.labels={"a"=>"letter a","d"=>"letter d"}
|
12
|
+
v3=([1,2,3,4,5,4,3,2,1,2]*10).to_vector(:ordinal)
|
13
|
+
@ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
|
14
|
+
end
|
15
|
+
def test_values_definition
|
16
|
+
a=[1.0,2,"a"]
|
17
|
+
assert_equal("<real>1.0</real> <int>2</int> <string>a</string>",Statsample::GGobi.values_definition(a))
|
18
|
+
end
|
19
|
+
def test_variable_definition
|
20
|
+
carrier=OpenStruct.new
|
21
|
+
carrier.categorials=[]
|
22
|
+
carrier.conversions={}
|
23
|
+
real_var_definition=Statsample::GGobi.variable_definition(carrier,@v2,'variable 2',"v2")
|
24
|
+
expected=<<EOS
|
25
|
+
<categoricalvariable name="variable 2" nickname="v2">
|
26
|
+
<levels count="4">
|
27
|
+
<level value="1">letter a</level>
|
28
|
+
<level value="2">b</level>
|
29
|
+
<level value="3">c</level>
|
30
|
+
<level value="4">letter d</level></levels>
|
31
|
+
</categoricalvariable>
|
32
|
+
EOS
|
33
|
+
assert_equal(expected.gsub(/\s/," "),real_var_definition.gsub(/\s/," "))
|
34
|
+
assert_equal({'variable 2'=>{'a'=>1,'b'=>2,'c'=>3,'d'=>4}},carrier.conversions)
|
35
|
+
assert_equal(['variable 2'],carrier.categorials)
|
36
|
+
end
|
37
|
+
def test_out
|
38
|
+
filename="/tmp/test_statsample_ggobi.xml"
|
39
|
+
go=Statsample::GGobi.out(@ds)
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample.rb'
|
2
|
+
require 'statsample/multiset'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StatsampleMultisetTestCase < Test::Unit::TestCase
|
6
|
+
def initialize(*args)
|
7
|
+
super
|
8
|
+
end
|
9
|
+
def test_creation
|
10
|
+
v1a=[1,2,3,4,5].to_vector
|
11
|
+
v2b=[11,21,31,41,51].to_vector
|
12
|
+
v3c=[21,23,34,45,56].to_vector
|
13
|
+
ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
|
14
|
+
v1b=[15,25,35,45,55].to_vector
|
15
|
+
v2b=[11,21,31,41,51].to_vector
|
16
|
+
v3b=[21,23,34,45,56].to_vector
|
17
|
+
ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
|
18
|
+
ms=Statsample::Multiset.new(['v1','v2','v3'])
|
19
|
+
ms.add_dataset('ds1',ds1)
|
20
|
+
ms.add_dataset('ds2',ds2)
|
21
|
+
assert_equal(ds1,ms['ds1'])
|
22
|
+
assert_equal(ds2,ms['ds2'])
|
23
|
+
assert_equal(v1a,ms['ds1']['v1'])
|
24
|
+
assert_not_equal(v1b,ms['ds1']['v1'])
|
25
|
+
ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
|
26
|
+
assert_raise ArgumentError do
|
27
|
+
ms.add_dataset(ds3)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
def test_creation_empty
|
31
|
+
ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
|
32
|
+
ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
|
33
|
+
ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
|
34
|
+
ms2=Statsample::Multiset.new(%w{id age name})
|
35
|
+
ms2.add_dataset('male',ds_male)
|
36
|
+
ms2.add_dataset('female',ds_female)
|
37
|
+
assert_equal(ms2.fields,ms.fields)
|
38
|
+
assert_equal(ms2['male'],ms['male'])
|
39
|
+
assert_equal(ms2['female'],ms['female'])
|
40
|
+
end
|
41
|
+
def test_to_multiset_by_split_one
|
42
|
+
sex=%w{m m m m m f f f f m}.to_vector(:nominal)
|
43
|
+
city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
|
44
|
+
age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
|
45
|
+
ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
|
46
|
+
ms=ds.to_multiset_by_split('sex')
|
47
|
+
assert_equal(2,ms.n_datasets)
|
48
|
+
assert_equal(%w{f m},ms.datasets.keys.sort)
|
49
|
+
assert_equal(6,ms['m'].cases)
|
50
|
+
assert_equal(4,ms['f'].cases)
|
51
|
+
assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
|
52
|
+
assert_equal([34,33,35,36],ms['f']['age'].to_a)
|
53
|
+
end
|
54
|
+
def test_to_multiset_by_split_multiple
|
55
|
+
sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
|
56
|
+
city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
|
57
|
+
hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
|
58
|
+
age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
|
59
|
+
ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
|
60
|
+
ms=ds.to_multiset_by_split('sex','city','hair')
|
61
|
+
assert_equal(8,ms.n_datasets)
|
62
|
+
assert_equal(3,ms[%w{m London blonde}].cases)
|
63
|
+
assert_equal(3,ms[%w{m London blonde}].cases)
|
64
|
+
assert_equal(1,ms[%w{m Paris black}].cases)
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_stratum_proportion
|
68
|
+
ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
|
69
|
+
ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
|
70
|
+
assert_equal(5.0/12, ds1['q1'].proportion )
|
71
|
+
assert_equal(7.0/9, ds2['q1'].proportion )
|
72
|
+
ms=Statsample::Multiset.new(['q1'])
|
73
|
+
ms.add_dataset('d1',ds1)
|
74
|
+
ms.add_dataset('d2',ds2)
|
75
|
+
ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
|
76
|
+
assert_in_delta(0.655, ss.proportion('q1'),0.01)
|
77
|
+
assert_in_delta(0.345, ss.proportion('q1',0),0.01)
|
78
|
+
|
79
|
+
end
|
80
|
+
def test_stratum_scale
|
81
|
+
boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
|
82
|
+
girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
|
83
|
+
ms=Statsample::Multiset.new(['test'])
|
84
|
+
ms.add_dataset('boys',boys)
|
85
|
+
ms.add_dataset('girls',girls)
|
86
|
+
ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
|
87
|
+
assert_equal(2,ss.strata_number)
|
88
|
+
assert_equal(20000,ss.population_size)
|
89
|
+
assert_equal(10000,ss.stratum_size('boys'))
|
90
|
+
assert_equal(10000,ss.stratum_size('girls'))
|
91
|
+
assert_equal(36,ss.sample_size)
|
92
|
+
assert_equal(75,ss.mean('test'))
|
93
|
+
assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
|
94
|
+
assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
|
95
|
+
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
class StatsampleRegressionTestCase < Test::Unit::TestCase
|
4
|
+
def initialize(*args)
|
5
|
+
@x=[13,20,10,33,15].to_vector(:scale)
|
6
|
+
@y=[23,18,35,10,27 ].to_vector(:scale)
|
7
|
+
@reg=Statsample::Regression::SimpleRegression.new_from_vectors(@x,@y)
|
8
|
+
super
|
9
|
+
end
|
10
|
+
def test_parameters
|
11
|
+
assert_in_delta(40.009, @reg.a,0.001)
|
12
|
+
assert_in_delta(-0.957, @reg.b,0.001)
|
13
|
+
assert_in_delta(4.248,@reg.standard_error,0.002)
|
14
|
+
end
|
15
|
+
def test_multiple_regression_pairwise_2
|
16
|
+
@a=[1,3,2,4,3,5,4,6,5,7,3,nil,3,nil,3].to_vector(:scale)
|
17
|
+
@b=[3,3,4,4,5,5,6,6,4,4,2,2,nil,6,2].to_vector(:scale)
|
18
|
+
@c=[11,22,30,40,50,65,78,79,99,100,nil,3,7,nil,7].to_vector(:scale)
|
19
|
+
@y=[3,4,5,6,7,8,9,10,20,30,30,40,nil,50,nil].to_vector(:scale)
|
20
|
+
ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
21
|
+
lr=Statsample::Regression::MultipleRegressionPairwise.new(ds,'y')
|
22
|
+
assert_in_delta(2407.436,lr.sst,0.001)
|
23
|
+
assert_in_delta(0.752,lr.r,0.001)
|
24
|
+
assert_in_delta(0.565,lr.r2,0.001)
|
25
|
+
assert_in_delta(1361.130,lr.ssr,0.001)
|
26
|
+
assert_in_delta(1046.306,lr.sse,0.001)
|
27
|
+
assert_in_delta(3.035,lr.f,0.001)
|
28
|
+
|
29
|
+
end
|
30
|
+
def test_multiple_regression_alglib
|
31
|
+
if HAS_ALGIB
|
32
|
+
@a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
33
|
+
@b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
34
|
+
@c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
35
|
+
@y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
36
|
+
ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
37
|
+
lr=Statsample::Regression::MultipleRegressionAlglib.new(ds,'y')
|
38
|
+
model_test(lr)
|
39
|
+
predicted=[1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
|
40
|
+
c_predicted=lr.predicted
|
41
|
+
predicted.each_index{|i|
|
42
|
+
assert_in_delta(predicted[i],c_predicted[i],0.001)
|
43
|
+
}
|
44
|
+
residuals=[1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
|
45
|
+
c_residuals=lr.residuals
|
46
|
+
residuals.each_index{|i|
|
47
|
+
assert_in_delta(residuals[i],c_residuals[i],0.001)
|
48
|
+
}
|
49
|
+
else
|
50
|
+
puts "Regression::MultipleRegressionAlglib not tested (no Alglib)"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
def model_test(lr)
|
54
|
+
assert_in_delta(0.695,lr.coeffs['a'],0.001)
|
55
|
+
assert_in_delta(11.027,lr.constant,0.001)
|
56
|
+
assert_in_delta(1.785,lr.process([1,3,11]),0.001)
|
57
|
+
|
58
|
+
|
59
|
+
s_coeffs={'a'=>0.151,'b'=>-0.547,'c'=>0.997}
|
60
|
+
cs_coeefs=lr.standarized_coeffs
|
61
|
+
s_coeffs.each_key{|k|
|
62
|
+
assert_in_delta(s_coeffs[k],cs_coeefs[k],0.001)
|
63
|
+
}
|
64
|
+
assert_in_delta(639.6,lr.sst,0.001)
|
65
|
+
assert_in_delta(583.76,lr.ssr,0.001)
|
66
|
+
assert_in_delta(55.840,lr.sse,0.001)
|
67
|
+
assert_in_delta(0.955,lr.r,0.001)
|
68
|
+
assert_in_delta(0.913,lr.r2,0.001)
|
69
|
+
assert_in_delta(20.908, lr.f,0.001)
|
70
|
+
if HAS_GSL
|
71
|
+
assert_in_delta(0.001, lr.significance, 0.001)
|
72
|
+
else
|
73
|
+
puts "#{lr.class}#significance not tested (not GSL)"
|
74
|
+
end
|
75
|
+
assert_in_delta(0.226,lr.tolerance("a"),0.001)
|
76
|
+
coeffs_se={"a"=>1.171,"b"=>1.129,"c"=>0.072}
|
77
|
+
ccoeffs_se=lr.coeffs_se
|
78
|
+
coeffs_se.each_key{|k|
|
79
|
+
assert_in_delta(coeffs_se[k],ccoeffs_se[k],0.001)
|
80
|
+
}
|
81
|
+
coeffs_t={"a"=>0.594,"b"=>-3.796,"c"=>3.703}
|
82
|
+
ccoeffs_t=lr.coeffs_t
|
83
|
+
coeffs_t.each_key{|k|
|
84
|
+
assert_in_delta(coeffs_t[k], ccoeffs_t[k],0.001)
|
85
|
+
}
|
86
|
+
assert_in_delta(4.559, lr.constant_se,0.001)
|
87
|
+
assert_in_delta(2.419, lr.constant_t,0.001)
|
88
|
+
end
|
89
|
+
def test_regression_pairwise
|
90
|
+
@a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
|
91
|
+
@b=[nil,3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
|
92
|
+
@c=[nil,11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
|
93
|
+
@y=[nil,3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
|
94
|
+
ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
|
95
|
+
lr=Statsample::Regression::MultipleRegressionPairwise.new(ds,'y')
|
96
|
+
model_test(lr)
|
97
|
+
predicted=[nil,1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
|
98
|
+
c_predicted = lr.predicted
|
99
|
+
predicted.each_index{|i|
|
100
|
+
assert_in_delta(predicted[i],c_predicted[i],0.001)
|
101
|
+
}
|
102
|
+
residuals=[nil,1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
|
103
|
+
c_residuals=lr.residuals
|
104
|
+
residuals.each_index{|i|
|
105
|
+
assert_in_delta(residuals[i],c_residuals[i],0.001)
|
106
|
+
}
|
107
|
+
end
|
108
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleReliabilityTestCase < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def initialize(*args)
|
7
|
+
super
|
8
|
+
@x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
|
9
|
+
@x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
|
10
|
+
@x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
|
11
|
+
@x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
|
12
|
+
@ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_general
|
16
|
+
ia=Statsample::Reliability::ItemAnalysis.new(@ds)
|
17
|
+
assert_in_delta(0.980,ia.alpha,0.001)
|
18
|
+
assert_in_delta(0.999,ia.alpha_standarized,0.001)
|
19
|
+
assert_in_delta(0.999,ia.item_total_correlation()['x1'],0.001)
|
20
|
+
assert_in_delta(1050.455,ia.stats_if_deleted()['x1'][:variance_sample],0.001)
|
21
|
+
end
|
22
|
+
def test_icc
|
23
|
+
#p @x1.factors
|
24
|
+
icc=Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
|
25
|
+
# Need to create the test!!!!
|
26
|
+
#p icc.curve_field('x1',1).sort
|
27
|
+
#p icc.curve_field('x1',2).sort
|
28
|
+
#p icc.curve_field('x1',3).sort
|
29
|
+
#p icc.curve_field('x1',30).sort
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleResampleTestCase < Test::Unit::TestCase
|
5
|
+
def initialize(*args)
|
6
|
+
super
|
7
|
+
end
|
8
|
+
def test_basic
|
9
|
+
r=Statsample::Resample.generate(20,1,10)
|
10
|
+
assert_equal(20,r.size)
|
11
|
+
assert(r.min>=1)
|
12
|
+
assert(r.max<=10)
|
13
|
+
end
|
14
|
+
def test_repeat_and_save
|
15
|
+
r=Statsample::Resample.repeat_and_save(400) {
|
16
|
+
Statsample::Resample.generate(20,1,10).count(1)
|
17
|
+
}
|
18
|
+
assert_equal(400,r.size)
|
19
|
+
v=Statsample::Vector.new(r,:scale)
|
20
|
+
a=v.count {|x| x > 3}
|
21
|
+
assert(a>=30 && a<=70)
|
22
|
+
end
|
23
|
+
end
|
data/test/test_srs.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleSrsTestCase < Test::Unit::TestCase
|
5
|
+
def test_std_error
|
6
|
+
if HAS_GSL
|
7
|
+
assert_equal(384,Statsample::SRS.estimation_n0(0.05,0.5,0.95).to_i)
|
8
|
+
assert_equal(108,Statsample::SRS.estimation_n(0.05,0.5,150,0.95).to_i)
|
9
|
+
else
|
10
|
+
puts "Statsample::SRS.estimation_n0 not tested (needs ruby-gsl)"
|
11
|
+
end
|
12
|
+
assert_in_delta(0.0289,Statsample::SRS.proportion_sd_kp_wor(0.5,100,150),0.001)
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleStatisicsTestCase < Test::Unit::TestCase
|
5
|
+
|
6
|
+
def initialize(*args)
|
7
|
+
super
|
8
|
+
end
|
9
|
+
def test_chi_square
|
10
|
+
assert_raise TypeError do
|
11
|
+
Statsample::Test.chi_square(1,1)
|
12
|
+
end
|
13
|
+
real=Matrix[[95,95],[45,155]]
|
14
|
+
expected=Matrix[[68,122],[72,128]]
|
15
|
+
assert_nothing_raised do
|
16
|
+
chi=Statsample::Test.chi_square(real,expected)
|
17
|
+
end
|
18
|
+
chi=Statsample::Test.chi_square(real,expected)
|
19
|
+
assert_in_delta(32.53,chi,0.1)
|
20
|
+
end
|
21
|
+
def test_sum_of_codeviated
|
22
|
+
v1=[1,2,3,4,5,6].to_vector(:scale)
|
23
|
+
v2=[6,2,4,10,12,8].to_vector(:scale)
|
24
|
+
assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
|
25
|
+
end
|
26
|
+
def test_pearson
|
27
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
28
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
29
|
+
assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
|
30
|
+
v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
|
31
|
+
v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
|
32
|
+
assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
|
33
|
+
end
|
34
|
+
def test_matrix_correlation
|
35
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
36
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
37
|
+
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
38
|
+
v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
|
39
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
|
40
|
+
c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
|
41
|
+
expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
|
42
|
+
[c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
|
43
|
+
]
|
44
|
+
obt=Statsample::Bivariate.correlation_matrix(ds)
|
45
|
+
for i in 0...expected.row_size
|
46
|
+
for j in 0...expected.column_size
|
47
|
+
#puts expected[i,j].inspect
|
48
|
+
#puts obt[i,j].inspect
|
49
|
+
assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
#assert_equal(expected,obt)
|
53
|
+
end
|
54
|
+
def test_prop_pearson
|
55
|
+
if HAS_GSL
|
56
|
+
assert_in_delta(0.42,Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94),94),0.01)
|
57
|
+
assert_in_delta(0.65,Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95),95),0.01)
|
58
|
+
else
|
59
|
+
puts "Bivariate.prop_pearson not tested (no ruby-gsl)"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
def test_covariance
|
63
|
+
if HAS_GSL
|
64
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
65
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
66
|
+
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
def test_spearman
|
71
|
+
v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
|
72
|
+
v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
|
73
|
+
assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
|
74
|
+
end
|
75
|
+
def test_point_biserial
|
76
|
+
c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
|
77
|
+
d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
|
78
|
+
assert_raise TypeError do
|
79
|
+
Statsample::Bivariate.point_biserial(c,d)
|
80
|
+
end
|
81
|
+
assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
|
82
|
+
end
|
83
|
+
def test_tau
|
84
|
+
v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
|
85
|
+
v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
|
86
|
+
assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
|
87
|
+
assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
|
88
|
+
v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
|
89
|
+
v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
|
90
|
+
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
|
91
|
+
end
|
92
|
+
def test_gamma
|
93
|
+
m=Matrix[[10,5,2],[10,15,20]]
|
94
|
+
assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
|
95
|
+
m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
|
96
|
+
assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
|
97
|
+
|
98
|
+
|
99
|
+
end
|
100
|
+
def test_estimation_mean
|
101
|
+
v=([42]*23+[41]*4+[36]*1+[32]*1+[29]*1+[27]*2+[23]*1+[19]*1+[16]*2+[15]*2+[14,11,10,9,7]+ [6]*3+[5]*2+[4,3]).to_vector(:scale)
|
102
|
+
assert_equal(50,v.size)
|
103
|
+
assert_equal(1471,v.sum())
|
104
|
+
if HAS_GSL
|
105
|
+
limits=Statsample::SRS.mean_confidence_interval_z(v.mean(), v.sds(), v.size,676,0.80)
|
106
|
+
else
|
107
|
+
puts "SRS.mean_confidence_interval_z not tested (no ruby-gsl)"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
def test_estimation_proportion
|
111
|
+
# total
|
112
|
+
pop=3042
|
113
|
+
sam=200
|
114
|
+
prop=0.19
|
115
|
+
assert_in_delta(81.8, Statsample::SRS.proportion_total_sd_ep_wor(prop, sam, pop), 0.1)
|
116
|
+
|
117
|
+
# confidence limits
|
118
|
+
pop=500
|
119
|
+
sam=100
|
120
|
+
prop=0.37
|
121
|
+
a=0.95
|
122
|
+
if HAS_GSL
|
123
|
+
l= Statsample::SRS.proportion_confidence_interval_z(prop, sam, pop, a)
|
124
|
+
assert_in_delta(0.28,l[0],0.01)
|
125
|
+
assert_in_delta(0.46,l[1],0.01)
|
126
|
+
else
|
127
|
+
puts "SRS.proportion_confidence_interval_z not tested (no ruby-gsl)"
|
128
|
+
|
129
|
+
end
|
130
|
+
end
|
131
|
+
def test_simple_linear_regression
|
132
|
+
a=[1,2,3,4,5,6].to_vector(:scale)
|
133
|
+
b=[6,2,4,10,12,8].to_vector(:scale)
|
134
|
+
reg = Statsample::Regression::SimpleRegression.new_from_vectors(a,b)
|
135
|
+
assert_in_delta((reg.ssr+reg.sse).to_f,reg.sst,0.001)
|
136
|
+
assert_in_delta(Statsample::Bivariate.pearson(a,b),reg.r,0.001)
|
137
|
+
assert_in_delta(2.4,reg.a,0.01)
|
138
|
+
assert_in_delta(1.314,reg.b,0.001)
|
139
|
+
assert_in_delta(0.657,reg.r,0.001)
|
140
|
+
assert_in_delta(0.432,reg.r2,0.001)
|
141
|
+
|
142
|
+
end
|
143
|
+
def a_test_multiple_regression
|
144
|
+
x1=[1,2,3,4,5,6].to_vector(:scale)
|
145
|
+
x2=[3,5,8,9,10,20].to_vector(:scale)
|
146
|
+
x3=[100,90,50,30,50,10].to_vector(:scale)
|
147
|
+
y=[6,2,4,10,12,8].to_vector(:scale)
|
148
|
+
reg=Statsample::Regression::MultipleRegression.new_from_vectors([x1,x2,x3],y)
|
149
|
+
# p reg
|
150
|
+
end
|
151
|
+
|
152
|
+
end
|