statsample 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/History.txt +79 -0
  2. data/Manifest.txt +56 -0
  3. data/README.txt +77 -0
  4. data/Rakefile +22 -0
  5. data/bin/statsample +2 -0
  6. data/demo/benchmark.rb +52 -0
  7. data/demo/chi-square.rb +44 -0
  8. data/demo/dice.rb +13 -0
  9. data/demo/distribution_t.rb +95 -0
  10. data/demo/graph.rb +9 -0
  11. data/demo/item_analysis.rb +30 -0
  12. data/demo/mean.rb +81 -0
  13. data/demo/proportion.rb +57 -0
  14. data/demo/sample_test.csv +113 -0
  15. data/demo/strata_proportion.rb +152 -0
  16. data/demo/stratum.rb +141 -0
  17. data/lib/spss.rb +131 -0
  18. data/lib/statsample.rb +216 -0
  19. data/lib/statsample/anova.rb +74 -0
  20. data/lib/statsample/bivariate.rb +255 -0
  21. data/lib/statsample/chidistribution.rb +39 -0
  22. data/lib/statsample/codification.rb +120 -0
  23. data/lib/statsample/converters.rb +338 -0
  24. data/lib/statsample/crosstab.rb +122 -0
  25. data/lib/statsample/dataset.rb +526 -0
  26. data/lib/statsample/dominanceanalysis.rb +259 -0
  27. data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
  28. data/lib/statsample/graph/gdchart.rb +45 -0
  29. data/lib/statsample/graph/svgboxplot.rb +108 -0
  30. data/lib/statsample/graph/svggraph.rb +181 -0
  31. data/lib/statsample/graph/svghistogram.rb +208 -0
  32. data/lib/statsample/graph/svgscatterplot.rb +111 -0
  33. data/lib/statsample/htmlreport.rb +232 -0
  34. data/lib/statsample/multiset.rb +281 -0
  35. data/lib/statsample/regression.rb +522 -0
  36. data/lib/statsample/reliability.rb +235 -0
  37. data/lib/statsample/resample.rb +20 -0
  38. data/lib/statsample/srs.rb +159 -0
  39. data/lib/statsample/test.rb +25 -0
  40. data/lib/statsample/vector.rb +759 -0
  41. data/test/_test_chart.rb +58 -0
  42. data/test/test_anova.rb +31 -0
  43. data/test/test_codification.rb +59 -0
  44. data/test/test_crosstab.rb +55 -0
  45. data/test/test_csv.csv +7 -0
  46. data/test/test_csv.rb +27 -0
  47. data/test/test_dataset.rb +293 -0
  48. data/test/test_ggobi.rb +42 -0
  49. data/test/test_multiset.rb +98 -0
  50. data/test/test_regression.rb +108 -0
  51. data/test/test_reliability.rb +32 -0
  52. data/test/test_resample.rb +23 -0
  53. data/test/test_srs.rb +14 -0
  54. data/test/test_statistics.rb +152 -0
  55. data/test/test_stratified.rb +19 -0
  56. data/test/test_svg_graph.rb +63 -0
  57. data/test/test_vector.rb +265 -0
  58. data/test/test_xls.rb +32 -0
  59. metadata +158 -0
@@ -0,0 +1,42 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'statsample/multiset'
3
+ require 'test/unit'
4
+
5
+ class StatsampleGGobiTestCase < Test::Unit::TestCase
6
+
7
+ def initialize(*args)
8
+ super
9
+ v1=([10.2,20.3,10,20,30,40,30,20,30,40]*10).to_vector(:scale)
10
+ @v2=(%w{a b c a a a b b c d}*10).to_vector(:nominal)
11
+ @v2.labels={"a"=>"letter a","d"=>"letter d"}
12
+ v3=([1,2,3,4,5,4,3,2,1,2]*10).to_vector(:ordinal)
13
+ @ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
14
+ end
15
+ def test_values_definition
16
+ a=[1.0,2,"a"]
17
+ assert_equal("<real>1.0</real> <int>2</int> <string>a</string>",Statsample::GGobi.values_definition(a))
18
+ end
19
+ def test_variable_definition
20
+ carrier=OpenStruct.new
21
+ carrier.categorials=[]
22
+ carrier.conversions={}
23
+ real_var_definition=Statsample::GGobi.variable_definition(carrier,@v2,'variable 2',"v2")
24
+ expected=<<EOS
25
+ <categoricalvariable name="variable 2" nickname="v2">
26
+ <levels count="4">
27
+ <level value="1">letter a</level>
28
+ <level value="2">b</level>
29
+ <level value="3">c</level>
30
+ <level value="4">letter d</level></levels>
31
+ </categoricalvariable>
32
+ EOS
33
+ assert_equal(expected.gsub(/\s/," "),real_var_definition.gsub(/\s/," "))
34
+ assert_equal({'variable 2'=>{'a'=>1,'b'=>2,'c'=>3,'d'=>4}},carrier.conversions)
35
+ assert_equal(['variable 2'],carrier.categorials)
36
+ end
37
+ def test_out
38
+ filename="/tmp/test_statsample_ggobi.xml"
39
+ go=Statsample::GGobi.out(@ds)
40
+
41
+ end
42
+ end
@@ -0,0 +1,98 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample.rb'
2
+ require 'statsample/multiset'
3
+ require 'test/unit'
4
+
5
+ class StatsampleMultisetTestCase < Test::Unit::TestCase
6
+ def initialize(*args)
7
+ super
8
+ end
9
+ def test_creation
10
+ v1a=[1,2,3,4,5].to_vector
11
+ v2b=[11,21,31,41,51].to_vector
12
+ v3c=[21,23,34,45,56].to_vector
13
+ ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
14
+ v1b=[15,25,35,45,55].to_vector
15
+ v2b=[11,21,31,41,51].to_vector
16
+ v3b=[21,23,34,45,56].to_vector
17
+ ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
18
+ ms=Statsample::Multiset.new(['v1','v2','v3'])
19
+ ms.add_dataset('ds1',ds1)
20
+ ms.add_dataset('ds2',ds2)
21
+ assert_equal(ds1,ms['ds1'])
22
+ assert_equal(ds2,ms['ds2'])
23
+ assert_equal(v1a,ms['ds1']['v1'])
24
+ assert_not_equal(v1b,ms['ds1']['v1'])
25
+ ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
26
+ assert_raise ArgumentError do
27
+ ms.add_dataset(ds3)
28
+ end
29
+ end
30
+ def test_creation_empty
31
+ ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
32
+ ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
33
+ ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
34
+ ms2=Statsample::Multiset.new(%w{id age name})
35
+ ms2.add_dataset('male',ds_male)
36
+ ms2.add_dataset('female',ds_female)
37
+ assert_equal(ms2.fields,ms.fields)
38
+ assert_equal(ms2['male'],ms['male'])
39
+ assert_equal(ms2['female'],ms['female'])
40
+ end
41
+ def test_to_multiset_by_split_one
42
+ sex=%w{m m m m m f f f f m}.to_vector(:nominal)
43
+ city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
44
+ age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
45
+ ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
46
+ ms=ds.to_multiset_by_split('sex')
47
+ assert_equal(2,ms.n_datasets)
48
+ assert_equal(%w{f m},ms.datasets.keys.sort)
49
+ assert_equal(6,ms['m'].cases)
50
+ assert_equal(4,ms['f'].cases)
51
+ assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
52
+ assert_equal([34,33,35,36],ms['f']['age'].to_a)
53
+ end
54
+ def test_to_multiset_by_split_multiple
55
+ sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
56
+ city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
57
+ hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
58
+ age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
59
+ ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
60
+ ms=ds.to_multiset_by_split('sex','city','hair')
61
+ assert_equal(8,ms.n_datasets)
62
+ assert_equal(3,ms[%w{m London blonde}].cases)
63
+ assert_equal(3,ms[%w{m London blonde}].cases)
64
+ assert_equal(1,ms[%w{m Paris black}].cases)
65
+ end
66
+
67
+ def test_stratum_proportion
68
+ ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
69
+ ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
70
+ assert_equal(5.0/12, ds1['q1'].proportion )
71
+ assert_equal(7.0/9, ds2['q1'].proportion )
72
+ ms=Statsample::Multiset.new(['q1'])
73
+ ms.add_dataset('d1',ds1)
74
+ ms.add_dataset('d2',ds2)
75
+ ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
76
+ assert_in_delta(0.655, ss.proportion('q1'),0.01)
77
+ assert_in_delta(0.345, ss.proportion('q1',0),0.01)
78
+
79
+ end
80
+ def test_stratum_scale
81
+ boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
82
+ girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
83
+ ms=Statsample::Multiset.new(['test'])
84
+ ms.add_dataset('boys',boys)
85
+ ms.add_dataset('girls',girls)
86
+ ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
87
+ assert_equal(2,ss.strata_number)
88
+ assert_equal(20000,ss.population_size)
89
+ assert_equal(10000,ss.stratum_size('boys'))
90
+ assert_equal(10000,ss.stratum_size('girls'))
91
+ assert_equal(36,ss.sample_size)
92
+ assert_equal(75,ss.mean('test'))
93
+ assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
94
+ assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
95
+
96
+
97
+ end
98
+ end
@@ -0,0 +1,108 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+ class StatsampleRegressionTestCase < Test::Unit::TestCase
4
+ def initialize(*args)
5
+ @x=[13,20,10,33,15].to_vector(:scale)
6
+ @y=[23,18,35,10,27 ].to_vector(:scale)
7
+ @reg=Statsample::Regression::SimpleRegression.new_from_vectors(@x,@y)
8
+ super
9
+ end
10
+ def test_parameters
11
+ assert_in_delta(40.009, @reg.a,0.001)
12
+ assert_in_delta(-0.957, @reg.b,0.001)
13
+ assert_in_delta(4.248,@reg.standard_error,0.002)
14
+ end
15
+ def test_multiple_regression_pairwise_2
16
+ @a=[1,3,2,4,3,5,4,6,5,7,3,nil,3,nil,3].to_vector(:scale)
17
+ @b=[3,3,4,4,5,5,6,6,4,4,2,2,nil,6,2].to_vector(:scale)
18
+ @c=[11,22,30,40,50,65,78,79,99,100,nil,3,7,nil,7].to_vector(:scale)
19
+ @y=[3,4,5,6,7,8,9,10,20,30,30,40,nil,50,nil].to_vector(:scale)
20
+ ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
21
+ lr=Statsample::Regression::MultipleRegressionPairwise.new(ds,'y')
22
+ assert_in_delta(2407.436,lr.sst,0.001)
23
+ assert_in_delta(0.752,lr.r,0.001)
24
+ assert_in_delta(0.565,lr.r2,0.001)
25
+ assert_in_delta(1361.130,lr.ssr,0.001)
26
+ assert_in_delta(1046.306,lr.sse,0.001)
27
+ assert_in_delta(3.035,lr.f,0.001)
28
+
29
+ end
30
+ def test_multiple_regression_alglib
31
+ if HAS_ALGIB
32
+ @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
33
+ @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
34
+ @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
35
+ @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
36
+ ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
37
+ lr=Statsample::Regression::MultipleRegressionAlglib.new(ds,'y')
38
+ model_test(lr)
39
+ predicted=[1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
40
+ c_predicted=lr.predicted
41
+ predicted.each_index{|i|
42
+ assert_in_delta(predicted[i],c_predicted[i],0.001)
43
+ }
44
+ residuals=[1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
45
+ c_residuals=lr.residuals
46
+ residuals.each_index{|i|
47
+ assert_in_delta(residuals[i],c_residuals[i],0.001)
48
+ }
49
+ else
50
+ puts "Regression::MultipleRegressionAlglib not tested (no Alglib)"
51
+ end
52
+ end
53
+ def model_test(lr)
54
+ assert_in_delta(0.695,lr.coeffs['a'],0.001)
55
+ assert_in_delta(11.027,lr.constant,0.001)
56
+ assert_in_delta(1.785,lr.process([1,3,11]),0.001)
57
+
58
+
59
+ s_coeffs={'a'=>0.151,'b'=>-0.547,'c'=>0.997}
60
+ cs_coeefs=lr.standarized_coeffs
61
+ s_coeffs.each_key{|k|
62
+ assert_in_delta(s_coeffs[k],cs_coeefs[k],0.001)
63
+ }
64
+ assert_in_delta(639.6,lr.sst,0.001)
65
+ assert_in_delta(583.76,lr.ssr,0.001)
66
+ assert_in_delta(55.840,lr.sse,0.001)
67
+ assert_in_delta(0.955,lr.r,0.001)
68
+ assert_in_delta(0.913,lr.r2,0.001)
69
+ assert_in_delta(20.908, lr.f,0.001)
70
+ if HAS_GSL
71
+ assert_in_delta(0.001, lr.significance, 0.001)
72
+ else
73
+ puts "#{lr.class}#significance not tested (not GSL)"
74
+ end
75
+ assert_in_delta(0.226,lr.tolerance("a"),0.001)
76
+ coeffs_se={"a"=>1.171,"b"=>1.129,"c"=>0.072}
77
+ ccoeffs_se=lr.coeffs_se
78
+ coeffs_se.each_key{|k|
79
+ assert_in_delta(coeffs_se[k],ccoeffs_se[k],0.001)
80
+ }
81
+ coeffs_t={"a"=>0.594,"b"=>-3.796,"c"=>3.703}
82
+ ccoeffs_t=lr.coeffs_t
83
+ coeffs_t.each_key{|k|
84
+ assert_in_delta(coeffs_t[k], ccoeffs_t[k],0.001)
85
+ }
86
+ assert_in_delta(4.559, lr.constant_se,0.001)
87
+ assert_in_delta(2.419, lr.constant_t,0.001)
88
+ end
89
+ def test_regression_pairwise
90
+ @a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
91
+ @b=[nil,3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
92
+ @c=[nil,11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
93
+ @y=[nil,3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
94
+ ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
95
+ lr=Statsample::Regression::MultipleRegressionPairwise.new(ds,'y')
96
+ model_test(lr)
97
+ predicted=[nil,1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
98
+ c_predicted = lr.predicted
99
+ predicted.each_index{|i|
100
+ assert_in_delta(predicted[i],c_predicted[i],0.001)
101
+ }
102
+ residuals=[nil,1.2142, -2.0989, 1.7566, -1.29085, 2.033, -2.3428, 0.18414, -0.47177, -3.66395, 4.6801]
103
+ c_residuals=lr.residuals
104
+ residuals.each_index{|i|
105
+ assert_in_delta(residuals[i],c_residuals[i],0.001)
106
+ }
107
+ end
108
+ end
@@ -0,0 +1,32 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+
4
+ class StatsampleReliabilityTestCase < Test::Unit::TestCase
5
+
6
+ def initialize(*args)
7
+ super
8
+ @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
9
+ @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
10
+ @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
11
+ @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
12
+ @ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
13
+ end
14
+
15
+ def test_general
16
+ ia=Statsample::Reliability::ItemAnalysis.new(@ds)
17
+ assert_in_delta(0.980,ia.alpha,0.001)
18
+ assert_in_delta(0.999,ia.alpha_standarized,0.001)
19
+ assert_in_delta(0.999,ia.item_total_correlation()['x1'],0.001)
20
+ assert_in_delta(1050.455,ia.stats_if_deleted()['x1'][:variance_sample],0.001)
21
+ end
22
+ def test_icc
23
+ #p @x1.factors
24
+ icc=Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
25
+ # Need to create the test!!!!
26
+ #p icc.curve_field('x1',1).sort
27
+ #p icc.curve_field('x1',2).sort
28
+ #p icc.curve_field('x1',3).sort
29
+ #p icc.curve_field('x1',30).sort
30
+
31
+ end
32
+ end
@@ -0,0 +1,23 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+
4
+ class StatsampleResampleTestCase < Test::Unit::TestCase
5
+ def initialize(*args)
6
+ super
7
+ end
8
+ def test_basic
9
+ r=Statsample::Resample.generate(20,1,10)
10
+ assert_equal(20,r.size)
11
+ assert(r.min>=1)
12
+ assert(r.max<=10)
13
+ end
14
+ def test_repeat_and_save
15
+ r=Statsample::Resample.repeat_and_save(400) {
16
+ Statsample::Resample.generate(20,1,10).count(1)
17
+ }
18
+ assert_equal(400,r.size)
19
+ v=Statsample::Vector.new(r,:scale)
20
+ a=v.count {|x| x > 3}
21
+ assert(a>=30 && a<=70)
22
+ end
23
+ end
data/test/test_srs.rb ADDED
@@ -0,0 +1,14 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+
4
+ class StatsampleSrsTestCase < Test::Unit::TestCase
5
+ def test_std_error
6
+ if HAS_GSL
7
+ assert_equal(384,Statsample::SRS.estimation_n0(0.05,0.5,0.95).to_i)
8
+ assert_equal(108,Statsample::SRS.estimation_n(0.05,0.5,150,0.95).to_i)
9
+ else
10
+ puts "Statsample::SRS.estimation_n0 not tested (needs ruby-gsl)"
11
+ end
12
+ assert_in_delta(0.0289,Statsample::SRS.proportion_sd_kp_wor(0.5,100,150),0.001)
13
+ end
14
+ end
@@ -0,0 +1,152 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+
4
+ class StatsampleStatisicsTestCase < Test::Unit::TestCase
5
+
6
+ def initialize(*args)
7
+ super
8
+ end
9
+ def test_chi_square
10
+ assert_raise TypeError do
11
+ Statsample::Test.chi_square(1,1)
12
+ end
13
+ real=Matrix[[95,95],[45,155]]
14
+ expected=Matrix[[68,122],[72,128]]
15
+ assert_nothing_raised do
16
+ chi=Statsample::Test.chi_square(real,expected)
17
+ end
18
+ chi=Statsample::Test.chi_square(real,expected)
19
+ assert_in_delta(32.53,chi,0.1)
20
+ end
21
+ def test_sum_of_codeviated
22
+ v1=[1,2,3,4,5,6].to_vector(:scale)
23
+ v2=[6,2,4,10,12,8].to_vector(:scale)
24
+ assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
25
+ end
26
+ def test_pearson
27
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
28
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
29
+ assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
30
+ v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
31
+ v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
32
+ assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
33
+ end
34
+ def test_matrix_correlation
35
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
36
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
37
+ v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
38
+ v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
39
+ ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
40
+ c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
41
+ expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
42
+ [c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
43
+ ]
44
+ obt=Statsample::Bivariate.correlation_matrix(ds)
45
+ for i in 0...expected.row_size
46
+ for j in 0...expected.column_size
47
+ #puts expected[i,j].inspect
48
+ #puts obt[i,j].inspect
49
+ assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
50
+ end
51
+ end
52
+ #assert_equal(expected,obt)
53
+ end
54
+ def test_prop_pearson
55
+ if HAS_GSL
56
+ assert_in_delta(0.42,Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94),94),0.01)
57
+ assert_in_delta(0.65,Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95),95),0.01)
58
+ else
59
+ puts "Bivariate.prop_pearson not tested (no ruby-gsl)"
60
+ end
61
+ end
62
+ def test_covariance
63
+ if HAS_GSL
64
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
65
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
66
+ assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
67
+
68
+ end
69
+ end
70
+ def test_spearman
71
+ v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
72
+ v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
73
+ assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
74
+ end
75
+ def test_point_biserial
76
+ c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
77
+ d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
78
+ assert_raise TypeError do
79
+ Statsample::Bivariate.point_biserial(c,d)
80
+ end
81
+ assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
82
+ end
83
+ def test_tau
84
+ v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
85
+ v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
86
+ assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
87
+ assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
88
+ v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
89
+ v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
90
+ assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
91
+ end
92
+ def test_gamma
93
+ m=Matrix[[10,5,2],[10,15,20]]
94
+ assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
95
+ m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
96
+ assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
97
+
98
+
99
+ end
100
+ def test_estimation_mean
101
+ v=([42]*23+[41]*4+[36]*1+[32]*1+[29]*1+[27]*2+[23]*1+[19]*1+[16]*2+[15]*2+[14,11,10,9,7]+ [6]*3+[5]*2+[4,3]).to_vector(:scale)
102
+ assert_equal(50,v.size)
103
+ assert_equal(1471,v.sum())
104
+ if HAS_GSL
105
+ limits=Statsample::SRS.mean_confidence_interval_z(v.mean(), v.sds(), v.size,676,0.80)
106
+ else
107
+ puts "SRS.mean_confidence_interval_z not tested (no ruby-gsl)"
108
+ end
109
+ end
110
+ def test_estimation_proportion
111
+ # total
112
+ pop=3042
113
+ sam=200
114
+ prop=0.19
115
+ assert_in_delta(81.8, Statsample::SRS.proportion_total_sd_ep_wor(prop, sam, pop), 0.1)
116
+
117
+ # confidence limits
118
+ pop=500
119
+ sam=100
120
+ prop=0.37
121
+ a=0.95
122
+ if HAS_GSL
123
+ l= Statsample::SRS.proportion_confidence_interval_z(prop, sam, pop, a)
124
+ assert_in_delta(0.28,l[0],0.01)
125
+ assert_in_delta(0.46,l[1],0.01)
126
+ else
127
+ puts "SRS.proportion_confidence_interval_z not tested (no ruby-gsl)"
128
+
129
+ end
130
+ end
131
+ def test_simple_linear_regression
132
+ a=[1,2,3,4,5,6].to_vector(:scale)
133
+ b=[6,2,4,10,12,8].to_vector(:scale)
134
+ reg = Statsample::Regression::SimpleRegression.new_from_vectors(a,b)
135
+ assert_in_delta((reg.ssr+reg.sse).to_f,reg.sst,0.001)
136
+ assert_in_delta(Statsample::Bivariate.pearson(a,b),reg.r,0.001)
137
+ assert_in_delta(2.4,reg.a,0.01)
138
+ assert_in_delta(1.314,reg.b,0.001)
139
+ assert_in_delta(0.657,reg.r,0.001)
140
+ assert_in_delta(0.432,reg.r2,0.001)
141
+
142
+ end
143
+ def a_test_multiple_regression
144
+ x1=[1,2,3,4,5,6].to_vector(:scale)
145
+ x2=[3,5,8,9,10,20].to_vector(:scale)
146
+ x3=[100,90,50,30,50,10].to_vector(:scale)
147
+ y=[6,2,4,10,12,8].to_vector(:scale)
148
+ reg=Statsample::Regression::MultipleRegression.new_from_vectors([x1,x2,x3],y)
149
+ # p reg
150
+ end
151
+
152
+ end