statsample 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +15 -9
  3. data/README.txt +6 -0
  4. data/Rakefile +8 -0
  5. data/{demo → examples}/correlation_matrix.rb +0 -0
  6. data/{demo/dominanceanalysis.rb → examples/dominance_analysis.rb} +0 -0
  7. data/{demo → examples}/dominance_analysis_bootstrap.rb +0 -0
  8. data/{demo → examples}/levene.rb +0 -0
  9. data/{demo → examples}/multiple_regression.rb +5 -3
  10. data/{demo → examples}/multivariate_correlation.rb +0 -0
  11. data/{demo → examples}/polychoric.rb +0 -0
  12. data/{demo → examples}/principal_axis.rb +0 -0
  13. data/examples/t_test.rb +11 -0
  14. data/{demo → examples}/tetrachoric.rb +0 -0
  15. data/lib/statistics2.rb +1 -1
  16. data/lib/statsample.rb +57 -6
  17. data/lib/statsample/bivariate/polychoric.rb +12 -25
  18. data/lib/statsample/bivariate/tetrachoric.rb +1 -3
  19. data/lib/statsample/converter/csv.rb +11 -12
  20. data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -3
  21. data/lib/statsample/factor/principalaxis.rb +0 -2
  22. data/lib/statsample/factor/rotation.rb +6 -8
  23. data/lib/statsample/graph.rb +8 -0
  24. data/lib/statsample/graph/svggraph.rb +0 -4
  25. data/lib/statsample/regression/multiple/baseengine.rb +25 -28
  26. data/lib/statsample/regression/multiple/matrixengine.rb +30 -34
  27. data/lib/statsample/test.rb +36 -1
  28. data/lib/statsample/test/levene.rb +11 -7
  29. data/lib/statsample/test/t.rb +189 -0
  30. data/test/test_anova.rb +8 -10
  31. data/test/test_bivariate.rb +40 -37
  32. data/test/test_codification.rb +9 -13
  33. data/test/test_combination.rb +37 -39
  34. data/test/test_crosstab.rb +46 -48
  35. data/test/test_csv.rb +40 -45
  36. data/test/test_dataset.rb +150 -152
  37. data/test/test_distribution.rb +24 -21
  38. data/test/test_dominance_analysis.rb +10 -12
  39. data/test/test_factor.rb +95 -91
  40. data/test/test_ggobi.rb +30 -33
  41. data/test/test_gsl.rb +4 -4
  42. data/test/test_helpers.rb +26 -0
  43. data/test/test_histogram.rb +5 -6
  44. data/test/test_logit.rb +20 -21
  45. data/test/test_matrix.rb +47 -48
  46. data/test/test_mle.rb +130 -131
  47. data/test/test_multiset.rb +95 -96
  48. data/test/test_permutation.rb +35 -36
  49. data/test/test_promise_after.rb +39 -0
  50. data/test/test_regression.rb +49 -51
  51. data/test/test_reliability.rb +29 -30
  52. data/test/test_resample.rb +22 -23
  53. data/test/test_srs.rb +8 -9
  54. data/test/test_statistics.rb +12 -6
  55. data/test/test_stest.rb +18 -10
  56. data/test/test_stratified.rb +15 -16
  57. data/test/test_svg_graph.rb +11 -22
  58. data/test/test_test_t.rb +40 -0
  59. data/test/test_umannwhitney.rb +14 -15
  60. data/test/test_vector.rb +33 -37
  61. data/test/test_xls.rb +34 -41
  62. metadata +22 -11
data/test/test_mle.rb CHANGED
@@ -1,140 +1,139 @@
1
- $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
- require 'statsample'
3
- require 'test/unit'
4
- class StatsampleMLETestCase < Test::Unit::TestCase
5
- def setup
6
- @file_binomial=File.dirname(__FILE__)+'/../data/test_binomial.csv'
7
- @crime=File.dirname(__FILE__)+'/../data/crime.txt'
8
- @cases=100
9
- a=Array.new()
10
- b=Array.new()
11
- c=Array.new()
12
- y=Array.new()
1
+ require(File.dirname(__FILE__)+'/test_helpers.rb')
13
2
 
14
- @cases.times{|i|
15
- a.push(2*rand()-i)
16
- b.push(2*rand()-5+i)
17
- c.push(2*rand()+i)
18
- y_val=i+(rand()*@cases.quo(2) - @cases.quo(4))
19
- y.push(y_val<(@cases/2.0) ? 0.0 : 1.0)
20
- }
21
- a=a.to_vector(:scale)
22
- b=b.to_vector(:scale)
23
- c=c.to_vector(:scale)
24
- y=y.to_vector(:scale)
25
-
26
- @ds_indep={'a'=>a,'b'=>b,'c'=>c}.to_dataset
27
- constant=([1.0]*@cases).to_vector(:scale)
28
- @ds_indep_2={'constant'=>constant,'a'=>a,'b'=>b,'c'=>c}.to_dataset
29
- @ds_indep_2.fields=%w{constant a b c}
30
- @mat_x=@ds_indep_2.to_matrix
31
- @mat_y=y.to_matrix(:vertical)
32
- @ds=@ds_indep.dup
33
- @ds.add_vector('y',y)
34
- end
35
- def test_normal
36
- y=Array.new()
37
- y=@ds_indep.collect{|row|
38
- row['a']*5+row['b']+row['c']+rand()*3
39
- }
40
- constant=([1]*@cases).to_vector(:scale)
41
- ds_indep_2=@ds_indep.dup
42
- ds_indep_2['constant']=constant
43
- ds_indep_2.fields=%w{constant a b c}
44
- mat_x=ds_indep_2.to_matrix
45
- mat_y=y.to_matrix(:vertical)
46
- mle=Statsample::MLE::Normal.new()
47
- mle.verbose=false
48
- coeffs_nr=mle.newton_raphson(mat_x,mat_y)
49
- #p coeffs_nr
50
- ds=@ds_indep.dup
51
- ds.add_vector('y',y)
52
- lr=Statsample::Regression.multiple(ds,'y')
53
- lr_constant = lr.constant
54
- lr_coeffs = lr.coeffs
55
- assert_in_delta(coeffs_nr[0,0], lr_constant,0.0000001)
56
- assert_in_delta(coeffs_nr[1,0], lr_coeffs["a"],0.0000001)
57
- assert_in_delta(coeffs_nr[2,0], lr_coeffs["b"],0.0000001)
58
- assert_in_delta(coeffs_nr[3,0], lr_coeffs["c"],0.0000001)
59
- end
60
-
61
- def test_probit
62
- ds=Statsample::CSV.read(@file_binomial)
63
- constant=([1.0]*ds.cases).to_vector(:scale)
64
- ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c})
65
- mat_x=ds_indep.to_matrix
66
- mat_y=ds['y'].to_matrix(:vertical)
67
- mle=Statsample::MLE::Probit.new
68
- b_probit=mle.newton_raphson(mat_x,mat_y)
69
- ll=mle.log_likehood(mat_x,mat_y,b_probit)
70
-
71
- b_exp=[-3.0670,0.1763,0.4483,-0.2240]
72
- b_exp.each_index{|i|
73
- assert_in_delta(b_exp[i], b_probit[i,0], 0.001)
74
- }
75
- assert_in_delta(-38.31559,ll,0.0001)
76
- end
77
- def test_logit_crime
78
- ds=Statsample::PlainText.read(@crime, %w{crimerat maleteen south educ police60 police59 labor males pop nonwhite unemp1 unemp2 median belowmed})
79
- constant=([1.0]*ds.cases).to_vector(:scale)
80
- ds2=ds.dup(%w{maleteen south educ police59})
81
- ds2['constant']=constant
82
- ds2.fields=%w{constant maleteen south educ police59}
83
- mat_x=ds2.to_matrix
84
- mat_y=(ds.compute "(crimerat>=110) ? 1:0").to_matrix(:vertical)
85
- mle=Statsample::MLE::Logit.new
86
- b=mle.newton_raphson(mat_x,mat_y)
87
- ll=mle.log_likehood(mat_x,mat_y,b)
88
- assert_in_delta(-18.606959,ll,0.001)
89
- exp=[-17.701, 0.0833,-1.117, 0.0229, 0.0581]
90
- exp.each_index{|i|
91
- assert_in_delta(exp[i],b[i,0],0.001)
92
- }
93
- assert_equal(5,mle.iterations)
94
- end
95
- def atest_logit_alglib
96
- if(HAS_ALGIB)
97
- ds=Statsample::CSV.read(@file_binomial)
98
- constant=([1.0]*ds.cases).to_vector(:scale)
99
-
100
- ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c} )
101
-
102
- mat_x=ds_indep.to_matrix
103
- mat_y=ds['y'].to_matrix(:vertical)
104
- log=Alglib::Logit.build_from_matrix(ds.to_matrix)
105
- coeffs=log.unpack[0]
106
- b_alglib=Matrix.columns([[-coeffs[3], -coeffs[0], -coeffs[1], -coeffs[2]]])
107
- mle=Statsample::MLE::Logit.new
108
- ll_alglib=mle.log_likehood(mat_x,mat_y,b_alglib)
109
- b_newton=mle.newton_raphson(mat_x,mat_y)
110
- ll_pure_ruby=mle.log_likehood(mat_x,mat_y,b_newton)
111
- #p b_alglib
112
- #p b_newton
113
-
114
- assert_in_delta(ll_alglib,ll_pure_ruby,1)
115
- end
3
+ class StatsampleMLETestCase < MiniTest::Unit::TestCase
4
+ def setup
5
+ @file_binomial=File.dirname(__FILE__)+'/../data/test_binomial.csv'
6
+ @crime=File.dirname(__FILE__)+'/../data/crime.txt'
7
+ @cases=100
8
+ a=Array.new()
9
+ b=Array.new()
10
+ c=Array.new()
11
+ y=Array.new()
116
12
 
117
- end
118
- def atest_logit1
119
- log=Alglib::Logit.build_from_matrix(@ds.to_matrix)
120
- coeffs=log.unpack[0]
121
- b=Matrix.columns([[-coeffs[3],-coeffs[0],-coeffs[1],-coeffs[2]]])
122
- # puts "Coeficientes beta alglib:"
123
- #p b
124
- mle_alglib=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,b)
125
- # puts "MLE Alglib:"
126
- #p mle_alglib
127
- # Statsample::CSV.write(ds,"test_binomial.csv")
13
+ @cases.times{|i|
14
+ a.push(2*rand()-i)
15
+ b.push(2*rand()-5+i)
16
+ c.push(2*rand()+i)
17
+ y_val=i+(rand()*@cases.quo(2) - @cases.quo(4))
18
+ y.push(y_val<(@cases/2.0) ? 0.0 : 1.0)
19
+ }
20
+ a=a.to_vector(:scale)
21
+ b=b.to_vector(:scale)
22
+ c=c.to_vector(:scale)
23
+ y=y.to_vector(:scale)
24
+
25
+ @ds_indep={'a'=>a,'b'=>b,'c'=>c}.to_dataset
26
+ constant=([1.0]*@cases).to_vector(:scale)
27
+ @ds_indep_2={'constant'=>constant,'a'=>a,'b'=>b,'c'=>c}.to_dataset
28
+ @ds_indep_2.fields=%w{constant a b c}
29
+ @mat_x=@ds_indep_2.to_matrix
30
+ @mat_y=y.to_matrix(:vertical)
31
+ @ds=@ds_indep.dup
32
+ @ds.add_vector('y',y)
33
+ end
34
+ def test_normal
35
+ y=Array.new()
36
+ y=@ds_indep.collect{|row|
37
+ row['a']*5+row['b']+row['c']+rand()*3
38
+ }
39
+ constant=([1]*@cases).to_vector(:scale)
40
+ ds_indep_2=@ds_indep.dup
41
+ ds_indep_2['constant']=constant
42
+ ds_indep_2.fields=%w{constant a b c}
43
+ mat_x=ds_indep_2.to_matrix
44
+ mat_y=y.to_matrix(:vertical)
45
+ mle=Statsample::MLE::Normal.new()
46
+ mle.verbose=false
47
+ coeffs_nr=mle.newton_raphson(mat_x,mat_y)
48
+ #p coeffs_nr
49
+ ds=@ds_indep.dup
50
+ ds.add_vector('y',y)
51
+ lr=Statsample::Regression.multiple(ds,'y')
52
+ lr_constant = lr.constant
53
+ lr_coeffs = lr.coeffs
54
+ assert_in_delta(coeffs_nr[0,0], lr_constant,0.0000001)
55
+ assert_in_delta(coeffs_nr[1,0], lr_coeffs["a"],0.0000001)
56
+ assert_in_delta(coeffs_nr[2,0], lr_coeffs["b"],0.0000001)
57
+ assert_in_delta(coeffs_nr[3,0], lr_coeffs["c"],0.0000001)
58
+ end
128
59
 
60
+ def test_probit
61
+ ds=Statsample::CSV.read(@file_binomial)
62
+ constant=([1.0]*ds.cases).to_vector(:scale)
63
+ ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c})
64
+ mat_x=ds_indep.to_matrix
65
+ mat_y=ds['y'].to_matrix(:vertical)
66
+ mle=Statsample::MLE::Probit.new
67
+ b_probit=mle.newton_raphson(mat_x,mat_y)
68
+ ll=mle.log_likehood(mat_x,mat_y,b_probit)
129
69
 
70
+ b_exp=[-3.0670,0.1763,0.4483,-0.2240]
71
+ b_exp.each_index{|i|
72
+ assert_in_delta(b_exp[i], b_probit[i,0], 0.001)
73
+ }
74
+ assert_in_delta(-38.31559,ll,0.0001)
75
+ end
76
+ def test_logit_crime
77
+ ds=Statsample::PlainText.read(@crime, %w{crimerat maleteen south educ police60 police59 labor males pop nonwhite unemp1 unemp2 median belowmed})
78
+ constant=([1.0]*ds.cases).to_vector(:scale)
79
+ ds2=ds.dup(%w{maleteen south educ police59})
80
+ ds2['constant']=constant
81
+ ds2.fields=%w{constant maleteen south educ police59}
82
+ mat_x=ds2.to_matrix
83
+ mat_y=(ds.compute "(crimerat>=110) ? 1:0").to_matrix(:vertical)
84
+ mle=Statsample::MLE::Logit.new
85
+ b=mle.newton_raphson(mat_x,mat_y)
86
+ ll=mle.log_likehood(mat_x,mat_y,b)
87
+ assert_in_delta(-18.606959,ll,0.001)
88
+ exp=[-17.701, 0.0833,-1.117, 0.0229, 0.0581]
89
+ exp.each_index{|i|
90
+ assert_in_delta(exp[i],b[i,0],0.001)
91
+ }
92
+ assert_equal(5,mle.iterations)
93
+ end
94
+ def atest_logit_alglib
95
+ if(HAS_ALGIB)
96
+ ds=Statsample::CSV.read(@file_binomial)
97
+ constant=([1.0]*ds.cases).to_vector(:scale)
130
98
 
131
- # puts "iniciando newton"
132
- coeffs_nr=Statsample::MLE.newton_raphson(@mat_x,@mat_y, Statsample::MLE::Logit)
133
- #p coeffs_nr
134
- mle_pure_ruby=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,coeffs_nr)
135
- #p mle_pure_ruby
99
+ ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c} )
136
100
 
137
- #puts "Malo: #{mle_malo} Bueno: #{mle_bueno} : #{mle_malo-mle_bueno}"
101
+ mat_x=ds_indep.to_matrix
102
+ mat_y=ds['y'].to_matrix(:vertical)
103
+ log=Alglib::Logit.build_from_matrix(ds.to_matrix)
104
+ coeffs=log.unpack[0]
105
+ b_alglib=Matrix.columns([[-coeffs[3], -coeffs[0], -coeffs[1], -coeffs[2]]])
106
+ mle=Statsample::MLE::Logit.new
107
+ ll_alglib=mle.log_likehood(mat_x,mat_y,b_alglib)
108
+ b_newton=mle.newton_raphson(mat_x,mat_y)
109
+ ll_pure_ruby=mle.log_likehood(mat_x,mat_y,b_newton)
110
+ #p b_alglib
111
+ #p b_newton
112
+
113
+ assert_in_delta(ll_alglib,ll_pure_ruby,1)
138
114
  end
115
+
116
+ end
117
+ def atest_logit1
118
+ log=Alglib::Logit.build_from_matrix(@ds.to_matrix)
119
+ coeffs=log.unpack[0]
120
+ b=Matrix.columns([[-coeffs[3],-coeffs[0],-coeffs[1],-coeffs[2]]])
121
+ # puts "Coeficientes beta alglib:"
122
+ #p b
123
+ mle_alglib=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,b)
124
+ # puts "MLE Alglib:"
125
+ #p mle_alglib
126
+ # Statsample::CSV.write(ds,"test_binomial.csv")
127
+
128
+
129
+
130
+ # puts "iniciando newton"
131
+ coeffs_nr=Statsample::MLE.newton_raphson(@mat_x,@mat_y, Statsample::MLE::Logit)
132
+ #p coeffs_nr
133
+ mle_pure_ruby=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,coeffs_nr)
134
+ #p mle_pure_ruby
135
+
136
+ #puts "Malo: #{mle_malo} Bueno: #{mle_bueno} : #{mle_malo-mle_bueno}"
137
+ end
139
138
  end
140
139
 
@@ -1,98 +1,97 @@
1
- $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
- require 'statsample'
3
- require 'test/unit'
1
+ require(File.dirname(__FILE__)+'/test_helpers.rb')
4
2
 
5
- class StatsampleMultisetTestCase < Test::Unit::TestCase
6
- def initialize(*args)
7
- super
8
- end
9
- def test_creation
10
- v1a=[1,2,3,4,5].to_vector
11
- v2b=[11,21,31,41,51].to_vector
12
- v3c=[21,23,34,45,56].to_vector
13
- ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
14
- v1b=[15,25,35,45,55].to_vector
15
- v2b=[11,21,31,41,51].to_vector
16
- v3b=[21,23,34,45,56].to_vector
17
- ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
18
- ms=Statsample::Multiset.new(['v1','v2','v3'])
19
- ms.add_dataset('ds1',ds1)
20
- ms.add_dataset('ds2',ds2)
21
- assert_equal(ds1,ms['ds1'])
22
- assert_equal(ds2,ms['ds2'])
23
- assert_equal(v1a,ms['ds1']['v1'])
24
- assert_not_equal(v1b,ms['ds1']['v1'])
25
- ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
26
- assert_raise ArgumentError do
27
- ms.add_dataset(ds3)
28
- end
29
- end
30
- def test_creation_empty
31
- ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
32
- ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
33
- ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
34
- ms2=Statsample::Multiset.new(%w{id age name})
35
- ms2.add_dataset('male',ds_male)
36
- ms2.add_dataset('female',ds_female)
37
- assert_equal(ms2.fields,ms.fields)
38
- assert_equal(ms2['male'],ms['male'])
39
- assert_equal(ms2['female'],ms['female'])
40
- end
41
- def test_to_multiset_by_split_one
42
- sex=%w{m m m m m f f f f m}.to_vector(:nominal)
43
- city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
44
- age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
45
- ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
46
- ms=ds.to_multiset_by_split('sex')
47
- assert_equal(2,ms.n_datasets)
48
- assert_equal(%w{f m},ms.datasets.keys.sort)
49
- assert_equal(6,ms['m'].cases)
50
- assert_equal(4,ms['f'].cases)
51
- assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
52
- assert_equal([34,33,35,36],ms['f']['age'].to_a)
53
- end
54
- def test_to_multiset_by_split_multiple
55
- sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
56
- city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
57
- hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
58
- age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
59
- ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
60
- ms=ds.to_multiset_by_split('sex','city','hair')
61
- assert_equal(8,ms.n_datasets)
62
- assert_equal(3,ms[%w{m London blonde}].cases)
63
- assert_equal(3,ms[%w{m London blonde}].cases)
64
- assert_equal(1,ms[%w{m Paris black}].cases)
65
- end
66
-
67
- def test_stratum_proportion
68
- ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
69
- ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
70
- assert_equal(5.0/12, ds1['q1'].proportion )
71
- assert_equal(7.0/9, ds2['q1'].proportion )
72
- ms=Statsample::Multiset.new(['q1'])
73
- ms.add_dataset('d1',ds1)
74
- ms.add_dataset('d2',ds2)
75
- ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
76
- assert_in_delta(0.655, ss.proportion('q1'),0.01)
77
- assert_in_delta(0.345, ss.proportion('q1',0),0.01)
78
-
79
- end
80
- def test_stratum_scale
81
- boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
82
- girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
83
- ms=Statsample::Multiset.new(['test'])
84
- ms.add_dataset('boys',boys)
85
- ms.add_dataset('girls',girls)
86
- ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
87
- assert_equal(2,ss.strata_number)
88
- assert_equal(20000,ss.population_size)
89
- assert_equal(10000,ss.stratum_size('boys'))
90
- assert_equal(10000,ss.stratum_size('girls'))
91
- assert_equal(36,ss.sample_size)
92
- assert_equal(75,ss.mean('test'))
93
- assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
94
- assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
95
-
96
-
3
+
4
+ class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
5
+ def initialize(*args)
6
+ super
7
+ end
8
+ def test_creation
9
+ v1a=[1,2,3,4,5].to_vector
10
+ v2b=[11,21,31,41,51].to_vector
11
+ v3c=[21,23,34,45,56].to_vector
12
+ ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
13
+ v1b=[15,25,35,45,55].to_vector
14
+ v2b=[11,21,31,41,51].to_vector
15
+ v3b=[21,23,34,45,56].to_vector
16
+ ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
17
+ ms=Statsample::Multiset.new(['v1','v2','v3'])
18
+ ms.add_dataset('ds1',ds1)
19
+ ms.add_dataset('ds2',ds2)
20
+ assert_equal(ds1,ms['ds1'])
21
+ assert_equal(ds2,ms['ds2'])
22
+ assert_equal(v1a,ms['ds1']['v1'])
23
+ assert_not_equal(v1b,ms['ds1']['v1'])
24
+ ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
25
+ assert_raise ArgumentError do
26
+ ms.add_dataset(ds3)
97
27
  end
98
- end
28
+ end
29
+ def test_creation_empty
30
+ ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
31
+ ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
32
+ ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
33
+ ms2=Statsample::Multiset.new(%w{id age name})
34
+ ms2.add_dataset('male',ds_male)
35
+ ms2.add_dataset('female',ds_female)
36
+ assert_equal(ms2.fields,ms.fields)
37
+ assert_equal(ms2['male'],ms['male'])
38
+ assert_equal(ms2['female'],ms['female'])
39
+ end
40
+ def test_to_multiset_by_split_one
41
+ sex=%w{m m m m m f f f f m}.to_vector(:nominal)
42
+ city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
43
+ age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
44
+ ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
45
+ ms=ds.to_multiset_by_split('sex')
46
+ assert_equal(2,ms.n_datasets)
47
+ assert_equal(%w{f m},ms.datasets.keys.sort)
48
+ assert_equal(6,ms['m'].cases)
49
+ assert_equal(4,ms['f'].cases)
50
+ assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
51
+ assert_equal([34,33,35,36],ms['f']['age'].to_a)
52
+ end
53
+ def test_to_multiset_by_split_multiple
54
+ sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
55
+ city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
56
+ hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
57
+ age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
58
+ ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
59
+ ms=ds.to_multiset_by_split('sex','city','hair')
60
+ assert_equal(8,ms.n_datasets)
61
+ assert_equal(3,ms[%w{m London blonde}].cases)
62
+ assert_equal(3,ms[%w{m London blonde}].cases)
63
+ assert_equal(1,ms[%w{m Paris black}].cases)
64
+ end
65
+
66
+ def test_stratum_proportion
67
+ ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
68
+ ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
69
+ assert_equal(5.0/12, ds1['q1'].proportion )
70
+ assert_equal(7.0/9, ds2['q1'].proportion )
71
+ ms=Statsample::Multiset.new(['q1'])
72
+ ms.add_dataset('d1',ds1)
73
+ ms.add_dataset('d2',ds2)
74
+ ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
75
+ assert_in_delta(0.655, ss.proportion('q1'),0.01)
76
+ assert_in_delta(0.345, ss.proportion('q1',0),0.01)
77
+
78
+ end
79
+ def test_stratum_scale
80
+ boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
81
+ girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
82
+ ms=Statsample::Multiset.new(['test'])
83
+ ms.add_dataset('boys',boys)
84
+ ms.add_dataset('girls',girls)
85
+ ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
86
+ assert_equal(2,ss.strata_number)
87
+ assert_equal(20000,ss.population_size)
88
+ assert_equal(10000,ss.stratum_size('boys'))
89
+ assert_equal(10000,ss.stratum_size('girls'))
90
+ assert_equal(36,ss.sample_size)
91
+ assert_equal(75,ss.mean('test'))
92
+ assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
93
+ assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
94
+
95
+
96
+ end
97
+ end