statsample 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/History.txt +79 -0
  2. data/Manifest.txt +56 -0
  3. data/README.txt +77 -0
  4. data/Rakefile +22 -0
  5. data/bin/statsample +2 -0
  6. data/demo/benchmark.rb +52 -0
  7. data/demo/chi-square.rb +44 -0
  8. data/demo/dice.rb +13 -0
  9. data/demo/distribution_t.rb +95 -0
  10. data/demo/graph.rb +9 -0
  11. data/demo/item_analysis.rb +30 -0
  12. data/demo/mean.rb +81 -0
  13. data/demo/proportion.rb +57 -0
  14. data/demo/sample_test.csv +113 -0
  15. data/demo/strata_proportion.rb +152 -0
  16. data/demo/stratum.rb +141 -0
  17. data/lib/spss.rb +131 -0
  18. data/lib/statsample.rb +216 -0
  19. data/lib/statsample/anova.rb +74 -0
  20. data/lib/statsample/bivariate.rb +255 -0
  21. data/lib/statsample/chidistribution.rb +39 -0
  22. data/lib/statsample/codification.rb +120 -0
  23. data/lib/statsample/converters.rb +338 -0
  24. data/lib/statsample/crosstab.rb +122 -0
  25. data/lib/statsample/dataset.rb +526 -0
  26. data/lib/statsample/dominanceanalysis.rb +259 -0
  27. data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
  28. data/lib/statsample/graph/gdchart.rb +45 -0
  29. data/lib/statsample/graph/svgboxplot.rb +108 -0
  30. data/lib/statsample/graph/svggraph.rb +181 -0
  31. data/lib/statsample/graph/svghistogram.rb +208 -0
  32. data/lib/statsample/graph/svgscatterplot.rb +111 -0
  33. data/lib/statsample/htmlreport.rb +232 -0
  34. data/lib/statsample/multiset.rb +281 -0
  35. data/lib/statsample/regression.rb +522 -0
  36. data/lib/statsample/reliability.rb +235 -0
  37. data/lib/statsample/resample.rb +20 -0
  38. data/lib/statsample/srs.rb +159 -0
  39. data/lib/statsample/test.rb +25 -0
  40. data/lib/statsample/vector.rb +759 -0
  41. data/test/_test_chart.rb +58 -0
  42. data/test/test_anova.rb +31 -0
  43. data/test/test_codification.rb +59 -0
  44. data/test/test_crosstab.rb +55 -0
  45. data/test/test_csv.csv +7 -0
  46. data/test/test_csv.rb +27 -0
  47. data/test/test_dataset.rb +293 -0
  48. data/test/test_ggobi.rb +42 -0
  49. data/test/test_multiset.rb +98 -0
  50. data/test/test_regression.rb +108 -0
  51. data/test/test_reliability.rb +32 -0
  52. data/test/test_resample.rb +23 -0
  53. data/test/test_srs.rb +14 -0
  54. data/test/test_statistics.rb +152 -0
  55. data/test/test_stratified.rb +19 -0
  56. data/test/test_svg_graph.rb +63 -0
  57. data/test/test_vector.rb +265 -0
  58. data/test/test_xls.rb +32 -0
  59. metadata +158 -0
@@ -0,0 +1,42 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'statsample/multiset'
3
+ require 'test/unit'
4
+
5
+ class StatsampleGGobiTestCase < Test::Unit::TestCase
6
+
7
+ def initialize(*args)
8
+ super
9
+ v1=([10.2,20.3,10,20,30,40,30,20,30,40]*10).to_vector(:scale)
10
+ @v2=(%w{a b c a a a b b c d}*10).to_vector(:nominal)
11
+ @v2.labels={"a"=>"letter a","d"=>"letter d"}
12
+ v3=([1,2,3,4,5,4,3,2,1,2]*10).to_vector(:ordinal)
13
+ @ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
14
+ end
15
+ def test_values_definition
16
+ a=[1.0,2,"a"]
17
+ assert_equal("<real>1.0</real> <int>2</int> <string>a</string>",Statsample::GGobi.values_definition(a))
18
+ end
19
+ def test_variable_definition
20
+ carrier=OpenStruct.new
21
+ carrier.categorials=[]
22
+ carrier.conversions={}
23
+ real_var_definition=Statsample::GGobi.variable_definition(carrier,@v2,'variable 2',"v2")
24
+ expected=<<EOS
25
+ <categoricalvariable name="variable 2" nickname="v2">
26
+ <levels count="4">
27
+ <level value="1">letter a</level>
28
+ <level value="2">b</level>
29
+ <level value="3">c</level>
30
+ <level value="4">letter d</level></levels>
31
+ </categoricalvariable>
32
+ EOS
33
+ assert_equal(expected.gsub(/\s/," "),real_var_definition.gsub(/\s/," "))
34
+ assert_equal({'variable 2'=>{'a'=>1,'b'=>2,'c'=>3,'d'=>4}},carrier.conversions)
35
+ assert_equal(['variable 2'],carrier.categorials)
36
+ end
37
+ def test_out
38
+ filename="/tmp/test_statsample_ggobi.xml"
39
+ go=Statsample::GGobi.out(@ds)
40
+
41
+ end
42
+ end
@@ -0,0 +1,98 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample.rb'
2
+ require 'statsample/multiset'
3
+ require 'test/unit'
4
+
5
+ class StatsampleMultisetTestCase < Test::Unit::TestCase
6
+ def initialize(*args)
7
+ super
8
+ end
9
+ def test_creation
10
+ v1a=[1,2,3,4,5].to_vector
11
+ v2b=[11,21,31,41,51].to_vector
12
+ v3c=[21,23,34,45,56].to_vector
13
+ ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
14
+ v1b=[15,25,35,45,55].to_vector
15
+ v2b=[11,21,31,41,51].to_vector
16
+ v3b=[21,23,34,45,56].to_vector
17
+ ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
18
+ ms=Statsample::Multiset.new(['v1','v2','v3'])
19
+ ms.add_dataset('ds1',ds1)
20
+ ms.add_dataset('ds2',ds2)
21
+ assert_equal(ds1,ms['ds1'])
22
+ assert_equal(ds2,ms['ds2'])
23
+ assert_equal(v1a,ms['ds1']['v1'])
24
+ assert_not_equal(v1b,ms['ds1']['v1'])
25
+ ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
26
+ assert_raise ArgumentError do
27
+ ms.add_dataset(ds3)
28
+ end
29
+ end
30
+ def test_creation_empty
31
+ ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
32
+ ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
33
+ ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
34
+ ms2=Statsample::Multiset.new(%w{id age name})
35
+ ms2.add_dataset('male',ds_male)
36
+ ms2.add_dataset('female',ds_female)
37
+ assert_equal(ms2.fields,ms.fields)
38
+ assert_equal(ms2['male'],ms['male'])
39
+ assert_equal(ms2['female'],ms['female'])
40
+ end
41
+ def test_to_multiset_by_split_one
42
+ sex=%w{m m m m m f f f f m}.to_vector(:nominal)
43
+ city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
44
+ age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
45
+ ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
46
+ ms=ds.to_multiset_by_split('sex')
47
+ assert_equal(2,ms.n_datasets)
48
+ assert_equal(%w{f m},ms.datasets.keys.sort)
49
+ assert_equal(6,ms['m'].cases)
50
+ assert_equal(4,ms['f'].cases)
51
+ assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
52
+ assert_equal([34,33,35,36],ms['f']['age'].to_a)
53
+ end
54
+ def test_to_multiset_by_split_multiple
55
+ sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
56
+ city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
57
+ hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
58
+ age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
59
+ ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
60
+ ms=ds.to_multiset_by_split('sex','city','hair')
61
+ assert_equal(8,ms.n_datasets)
62
+ assert_equal(3,ms[%w{m London blonde}].cases)
63
+ assert_equal(3,ms[%w{m London blonde}].cases)
64
+ assert_equal(1,ms[%w{m Paris black}].cases)
65
+ end
66
+
67
+ def test_stratum_proportion
68
+ ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
69
+ ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
70
+ assert_equal(5.0/12, ds1['q1'].proportion )
71
+ assert_equal(7.0/9, ds2['q1'].proportion )
72
+ ms=Statsample::Multiset.new(['q1'])
73
+ ms.add_dataset('d1',ds1)
74
+ ms.add_dataset('d2',ds2)
75
+ ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
76
+ assert_in_delta(0.655, ss.proportion('q1'),0.01)
77
+ assert_in_delta(0.345, ss.proportion('q1',0),0.01)
78
+
79
+ end
80
+ def test_stratum_scale
81
+ boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
82
+ girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
83
+ ms=Statsample::Multiset.new(['test'])
84
+ ms.add_dataset('boys',boys)
85
+ ms.add_dataset('girls',girls)
86
+ ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
87
+ assert_equal(2,ss.strata_number)
88
+ assert_equal(20000,ss.population_size)
89
+ assert_equal(10000,ss.stratum_size('boys'))
90
+ assert_equal(10000,ss.stratum_size('girls'))
91
+ assert_equal(36,ss.sample_size)
92
+ assert_equal(75,ss.mean('test'))
93
+ assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
94
+ assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
95
+
96
+
97
+ end
98
+ end
@@ -0,0 +1,108 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+ class StatsampleRegressionTestCase < Test::Unit::TestCase
4
+ def initialize(*args)
5
+ @x=[13,20,10,33,15].to_vector(:scale)
6
+ @y=[23,18,35,10,27 ].to_vector(:scale)
7
+ @reg=Statsample::Regression::SimpleRegression.new_from_vectors(@x,@y)
8
+ super
9
+ end
10
+ def test_parameters
11
+ assert_in_delta(40.009, @reg.a,0.001)
12
+ assert_in_delta(-0.957, @reg.b,0.001)
13
+ assert_in_delta(4.248,@reg.standard_error,0.002)
14
+ end
15
+ def test_multiple_regression_pairwise_2
16
+ @a=[1,3,2,4,3,5,4,6,5,7,3,nil,3,nil,3].to_vector(:scale)
17
+ @b=[3,3,4,4,5,5,6,6,4,4,2,2,nil,6,2].to_vector(:scale)
18
+ @c=[11,22,30,40,50,65,78,79,99,100,nil,3,7,nil,7].to_vector(:scale)
19
+ @y=[3,4,5,6,7,8,9,10,20,30,30,40,nil,50,nil].to_vector(:scale)
20
+ ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
21
+ lr=Statsample::Regression::MultipleRegressionPairwise.new(ds,'y')
22
+ assert_in_delta(2407.436,lr.sst,0.001)
23
+ assert_in_delta(0.752,lr.r,0.001)
24
+ assert_in_delta(0.565,lr.r2,0.001)
25
+ assert_in_delta(1361.130,lr.ssr,0.001)
26
+ assert_in_delta(1046.306,lr.sse,0.001)
27
+ assert_in_delta(3.035,lr.f,0.001)
28
+
29
+ end
30
+ def test_multiple_regression_alglib
31
+ if HAS_ALGIB
32
+ @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
33
+ @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
34
+ @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
35
+ @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
36
+ ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
37
+ lr=Statsample::Regression::MultipleRegressionAlglib.new(ds,'y')
38
+ model_test(lr)
39
+ predicted=[1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
40
+ c_predicted=lr.predicted
41
+ predicted.each_index{|i|
42
+ assert_in_delta(predicted[i],c_predicted[i],0.001)
43
+ }
44
+ residuals=[1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
45
+ c_residuals=lr.residuals
46
+ residuals.each_index{|i|
47
+ assert_in_delta(residuals[i],c_residuals[i],0.001)
48
+ }
49
+ else
50
+ puts "Regression::MultipleRegressionAlglib not tested (no Alglib)"
51
+ end
52
+ end
53
+ def model_test(lr)
54
+ assert_in_delta(0.695,lr.coeffs['a'],0.001)
55
+ assert_in_delta(11.027,lr.constant,0.001)
56
+ assert_in_delta(1.785,lr.process([1,3,11]),0.001)
57
+
58
+
59
+ s_coeffs={'a'=>0.151,'b'=>-0.547,'c'=>0.997}
60
+ cs_coeefs=lr.standarized_coeffs
61
+ s_coeffs.each_key{|k|
62
+ assert_in_delta(s_coeffs[k],cs_coeefs[k],0.001)
63
+ }
64
+ assert_in_delta(639.6,lr.sst,0.001)
65
+ assert_in_delta(583.76,lr.ssr,0.001)
66
+ assert_in_delta(55.840,lr.sse,0.001)
67
+ assert_in_delta(0.955,lr.r,0.001)
68
+ assert_in_delta(0.913,lr.r2,0.001)
69
+ assert_in_delta(20.908, lr.f,0.001)
70
+ if HAS_GSL
71
+ assert_in_delta(0.001, lr.significance, 0.001)
72
+ else
73
+ puts "#{lr.class}#significance not tested (not GSL)"
74
+ end
75
+ assert_in_delta(0.226,lr.tolerance("a"),0.001)
76
+ coeffs_se={"a"=>1.171,"b"=>1.129,"c"=>0.072}
77
+ ccoeffs_se=lr.coeffs_se
78
+ coeffs_se.each_key{|k|
79
+ assert_in_delta(coeffs_se[k],ccoeffs_se[k],0.001)
80
+ }
81
+ coeffs_t={"a"=>0.594,"b"=>-3.796,"c"=>3.703}
82
+ ccoeffs_t=lr.coeffs_t
83
+ coeffs_t.each_key{|k|
84
+ assert_in_delta(coeffs_t[k], ccoeffs_t[k],0.001)
85
+ }
86
+ assert_in_delta(4.559, lr.constant_se,0.001)
87
+ assert_in_delta(2.419, lr.constant_t,0.001)
88
+ end
89
+ def test_regression_pairwise
90
+ @a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
91
+ @b=[nil,3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
92
+ @c=[nil,11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
93
+ @y=[nil,3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
94
+ ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
95
+ lr=Statsample::Regression::MultipleRegressionPairwise.new(ds,'y')
96
+ model_test(lr)
97
+ predicted=[nil,1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
98
+ c_predicted = lr.predicted
99
+ predicted.each_index{|i|
100
+ assert_in_delta(predicted[i],c_predicted[i],0.001)
101
+ }
102
+ residuals=[nil,1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
103
+ c_residuals=lr.residuals
104
+ residuals.each_index{|i|
105
+ assert_in_delta(residuals[i],c_residuals[i],0.001)
106
+ }
107
+ end
108
+ end
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+
4
+ class StatsampleReliabilityTestCase < Test::Unit::TestCase
5
+
6
+ def initialize(*args)
7
+ super
8
+ @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
9
+ @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
10
+ @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
11
+ @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
12
+ @ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
13
+ end
14
+
15
+ def test_general
16
+ ia=Statsample::Reliability::ItemAnalysis.new(@ds)
17
+ assert_in_delta(0.980,ia.alpha,0.001)
18
+ assert_in_delta(0.999,ia.alpha_standarized,0.001)
19
+ assert_in_delta(0.999,ia.item_total_correlation()['x1'],0.001)
20
+ assert_in_delta(1050.455,ia.stats_if_deleted()['x1'][:variance_sample],0.001)
21
+ end
22
+ def test_icc
23
+ #p @x1.factors
24
+ icc=Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
25
+ # Need to create the test!!!!
26
+ #p icc.curve_field('x1',1).sort
27
+ #p icc.curve_field('x1',2).sort
28
+ #p icc.curve_field('x1',3).sort
29
+ #p icc.curve_field('x1',30).sort
30
+
31
+ end
32
+ end
@@ -0,0 +1,23 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+
4
+ class StatsampleResampleTestCase < Test::Unit::TestCase
5
+ def initialize(*args)
6
+ super
7
+ end
8
+ def test_basic
9
+ r=Statsample::Resample.generate(20,1,10)
10
+ assert_equal(20,r.size)
11
+ assert(r.min>=1)
12
+ assert(r.max<=10)
13
+ end
14
+ def test_repeat_and_save
15
+ r=Statsample::Resample.repeat_and_save(400) {
16
+ Statsample::Resample.generate(20,1,10).count(1)
17
+ }
18
+ assert_equal(400,r.size)
19
+ v=Statsample::Vector.new(r,:scale)
20
+ a=v.count {|x| x > 3}
21
+ assert(a>=30 && a<=70)
22
+ end
23
+ end
data/test/test_srs.rb ADDED
@@ -0,0 +1,14 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+
4
+ class StatsampleSrsTestCase < Test::Unit::TestCase
5
+ def test_std_error
6
+ if HAS_GSL
7
+ assert_equal(384,Statsample::SRS.estimation_n0(0.05,0.5,0.95).to_i)
8
+ assert_equal(108,Statsample::SRS.estimation_n(0.05,0.5,150,0.95).to_i)
9
+ else
10
+ puts "Statsample::SRS.estimation_n0 not tested (needs ruby-gsl)"
11
+ end
12
+ assert_in_delta(0.0289,Statsample::SRS.proportion_sd_kp_wor(0.5,100,150),0.001)
13
+ end
14
+ end
@@ -0,0 +1,152 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+
4
+ class StatsampleStatisicsTestCase < Test::Unit::TestCase
5
+
6
+ def initialize(*args)
7
+ super
8
+ end
9
+ def test_chi_square
10
+ assert_raise TypeError do
11
+ Statsample::Test.chi_square(1,1)
12
+ end
13
+ real=Matrix[[95,95],[45,155]]
14
+ expected=Matrix[[68,122],[72,128]]
15
+ assert_nothing_raised do
16
+ chi=Statsample::Test.chi_square(real,expected)
17
+ end
18
+ chi=Statsample::Test.chi_square(real,expected)
19
+ assert_in_delta(32.53,chi,0.1)
20
+ end
21
+ def test_sum_of_codeviated
22
+ v1=[1,2,3,4,5,6].to_vector(:scale)
23
+ v2=[6,2,4,10,12,8].to_vector(:scale)
24
+ assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
25
+ end
26
+ def test_pearson
27
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
28
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
29
+ assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
30
+ v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
31
+ v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
32
+ assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
33
+ end
34
+ def test_matrix_correlation
35
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
36
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
37
+ v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
38
+ v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
39
+ ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
40
+ c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
41
+ expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
42
+ [c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
43
+ ]
44
+ obt=Statsample::Bivariate.correlation_matrix(ds)
45
+ for i in 0...expected.row_size
46
+ for j in 0...expected.column_size
47
+ #puts expected[i,j].inspect
48
+ #puts obt[i,j].inspect
49
+ assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
50
+ end
51
+ end
52
+ #assert_equal(expected,obt)
53
+ end
54
+ def test_prop_pearson
55
+ if HAS_GSL
56
+ assert_in_delta(0.42,Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94),94),0.01)
57
+ assert_in_delta(0.65,Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95),95),0.01)
58
+ else
59
+ puts "Bivariate.prop_pearson not tested (no ruby-gsl)"
60
+ end
61
+ end
62
+ def test_covariance
63
+ if HAS_GSL
64
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
65
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
66
+ assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
67
+
68
+ end
69
+ end
70
+ def test_spearman
71
+ v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
72
+ v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
73
+ assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
74
+ end
75
+ def test_point_biserial
76
+ c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
77
+ d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
78
+ assert_raise TypeError do
79
+ Statsample::Bivariate.point_biserial(c,d)
80
+ end
81
+ assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
82
+ end
83
+ def test_tau
84
+ v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
85
+ v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
86
+ assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
87
+ assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
88
+ v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
89
+ v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
90
+ assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
91
+ end
92
+ def test_gamma
93
+ m=Matrix[[10,5,2],[10,15,20]]
94
+ assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
95
+ m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
96
+ assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
97
+
98
+
99
+ end
100
+ def test_estimation_mean
101
+ v=([42]*23+[41]*4+[36]*1+[32]*1+[29]*1+[27]*2+[23]*1+[19]*1+[16]*2+[15]*2+[14,11,10,9,7]+ [6]*3+[5]*2+[4,3]).to_vector(:scale)
102
+ assert_equal(50,v.size)
103
+ assert_equal(1471,v.sum())
104
+ if HAS_GSL
105
+ limits=Statsample::SRS.mean_confidence_interval_z(v.mean(), v.sds(), v.size,676,0.80)
106
+ else
107
+ puts "SRS.mean_confidence_interval_z not tested (no ruby-gsl)"
108
+ end
109
+ end
110
+ def test_estimation_proportion
111
+ # total
112
+ pop=3042
113
+ sam=200
114
+ prop=0.19
115
+ assert_in_delta(81.8, Statsample::SRS.proportion_total_sd_ep_wor(prop, sam, pop), 0.1)
116
+
117
+ # confidence limits
118
+ pop=500
119
+ sam=100
120
+ prop=0.37
121
+ a=0.95
122
+ if HAS_GSL
123
+ l= Statsample::SRS.proportion_confidence_interval_z(prop, sam, pop, a)
124
+ assert_in_delta(0.28,l[0],0.01)
125
+ assert_in_delta(0.46,l[1],0.01)
126
+ else
127
+ puts "SRS.proportion_confidence_interval_z not tested (no ruby-gsl)"
128
+
129
+ end
130
+ end
131
+ def test_simple_linear_regression
132
+ a=[1,2,3,4,5,6].to_vector(:scale)
133
+ b=[6,2,4,10,12,8].to_vector(:scale)
134
+ reg = Statsample::Regression::SimpleRegression.new_from_vectors(a,b)
135
+ assert_in_delta((reg.ssr+reg.sse).to_f,reg.sst,0.001)
136
+ assert_in_delta(Statsample::Bivariate.pearson(a,b),reg.r,0.001)
137
+ assert_in_delta(2.4,reg.a,0.01)
138
+ assert_in_delta(1.314,reg.b,0.001)
139
+ assert_in_delta(0.657,reg.r,0.001)
140
+ assert_in_delta(0.432,reg.r2,0.001)
141
+
142
+ end
143
+ def a_test_multiple_regression
144
+ x1=[1,2,3,4,5,6].to_vector(:scale)
145
+ x2=[3,5,8,9,10,20].to_vector(:scale)
146
+ x3=[100,90,50,30,50,10].to_vector(:scale)
147
+ y=[6,2,4,10,12,8].to_vector(:scale)
148
+ reg=Statsample::Regression::MultipleRegression.new_from_vectors([x1,x2,x3],y)
149
+ # p reg
150
+ end
151
+
152
+ end