statsample 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -0
- data/Manifest.txt +15 -9
- data/README.txt +6 -0
- data/Rakefile +8 -0
- data/{demo → examples}/correlation_matrix.rb +0 -0
- data/{demo/dominanceanalysis.rb → examples/dominance_analysis.rb} +0 -0
- data/{demo → examples}/dominance_analysis_bootstrap.rb +0 -0
- data/{demo → examples}/levene.rb +0 -0
- data/{demo → examples}/multiple_regression.rb +5 -3
- data/{demo → examples}/multivariate_correlation.rb +0 -0
- data/{demo → examples}/polychoric.rb +0 -0
- data/{demo → examples}/principal_axis.rb +0 -0
- data/examples/t_test.rb +11 -0
- data/{demo → examples}/tetrachoric.rb +0 -0
- data/lib/statistics2.rb +1 -1
- data/lib/statsample.rb +57 -6
- data/lib/statsample/bivariate/polychoric.rb +12 -25
- data/lib/statsample/bivariate/tetrachoric.rb +1 -3
- data/lib/statsample/converter/csv.rb +11 -12
- data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -3
- data/lib/statsample/factor/principalaxis.rb +0 -2
- data/lib/statsample/factor/rotation.rb +6 -8
- data/lib/statsample/graph.rb +8 -0
- data/lib/statsample/graph/svggraph.rb +0 -4
- data/lib/statsample/regression/multiple/baseengine.rb +25 -28
- data/lib/statsample/regression/multiple/matrixengine.rb +30 -34
- data/lib/statsample/test.rb +36 -1
- data/lib/statsample/test/levene.rb +11 -7
- data/lib/statsample/test/t.rb +189 -0
- data/test/test_anova.rb +8 -10
- data/test/test_bivariate.rb +40 -37
- data/test/test_codification.rb +9 -13
- data/test/test_combination.rb +37 -39
- data/test/test_crosstab.rb +46 -48
- data/test/test_csv.rb +40 -45
- data/test/test_dataset.rb +150 -152
- data/test/test_distribution.rb +24 -21
- data/test/test_dominance_analysis.rb +10 -12
- data/test/test_factor.rb +95 -91
- data/test/test_ggobi.rb +30 -33
- data/test/test_gsl.rb +4 -4
- data/test/test_helpers.rb +26 -0
- data/test/test_histogram.rb +5 -6
- data/test/test_logit.rb +20 -21
- data/test/test_matrix.rb +47 -48
- data/test/test_mle.rb +130 -131
- data/test/test_multiset.rb +95 -96
- data/test/test_permutation.rb +35 -36
- data/test/test_promise_after.rb +39 -0
- data/test/test_regression.rb +49 -51
- data/test/test_reliability.rb +29 -30
- data/test/test_resample.rb +22 -23
- data/test/test_srs.rb +8 -9
- data/test/test_statistics.rb +12 -6
- data/test/test_stest.rb +18 -10
- data/test/test_stratified.rb +15 -16
- data/test/test_svg_graph.rb +11 -22
- data/test/test_test_t.rb +40 -0
- data/test/test_umannwhitney.rb +14 -15
- data/test/test_vector.rb +33 -37
- data/test/test_xls.rb +34 -41
- metadata +22 -11
data/test/test_mle.rb
CHANGED
@@ -1,140 +1,139 @@
|
|
1
|
-
|
2
|
-
require 'statsample'
|
3
|
-
require 'test/unit'
|
4
|
-
class StatsampleMLETestCase < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@file_binomial=File.dirname(__FILE__)+'/../data/test_binomial.csv'
|
7
|
-
@crime=File.dirname(__FILE__)+'/../data/crime.txt'
|
8
|
-
@cases=100
|
9
|
-
a=Array.new()
|
10
|
-
b=Array.new()
|
11
|
-
c=Array.new()
|
12
|
-
y=Array.new()
|
1
|
+
require(File.dirname(__FILE__)+'/test_helpers.rb')
|
13
2
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
c=c.to_vector(:scale)
|
24
|
-
y=y.to_vector(:scale)
|
25
|
-
|
26
|
-
@ds_indep={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
27
|
-
constant=([1.0]*@cases).to_vector(:scale)
|
28
|
-
@ds_indep_2={'constant'=>constant,'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
29
|
-
@ds_indep_2.fields=%w{constant a b c}
|
30
|
-
@mat_x=@ds_indep_2.to_matrix
|
31
|
-
@mat_y=y.to_matrix(:vertical)
|
32
|
-
@ds=@ds_indep.dup
|
33
|
-
@ds.add_vector('y',y)
|
34
|
-
end
|
35
|
-
def test_normal
|
36
|
-
y=Array.new()
|
37
|
-
y=@ds_indep.collect{|row|
|
38
|
-
row['a']*5+row['b']+row['c']+rand()*3
|
39
|
-
}
|
40
|
-
constant=([1]*@cases).to_vector(:scale)
|
41
|
-
ds_indep_2=@ds_indep.dup
|
42
|
-
ds_indep_2['constant']=constant
|
43
|
-
ds_indep_2.fields=%w{constant a b c}
|
44
|
-
mat_x=ds_indep_2.to_matrix
|
45
|
-
mat_y=y.to_matrix(:vertical)
|
46
|
-
mle=Statsample::MLE::Normal.new()
|
47
|
-
mle.verbose=false
|
48
|
-
coeffs_nr=mle.newton_raphson(mat_x,mat_y)
|
49
|
-
#p coeffs_nr
|
50
|
-
ds=@ds_indep.dup
|
51
|
-
ds.add_vector('y',y)
|
52
|
-
lr=Statsample::Regression.multiple(ds,'y')
|
53
|
-
lr_constant = lr.constant
|
54
|
-
lr_coeffs = lr.coeffs
|
55
|
-
assert_in_delta(coeffs_nr[0,0], lr_constant,0.0000001)
|
56
|
-
assert_in_delta(coeffs_nr[1,0], lr_coeffs["a"],0.0000001)
|
57
|
-
assert_in_delta(coeffs_nr[2,0], lr_coeffs["b"],0.0000001)
|
58
|
-
assert_in_delta(coeffs_nr[3,0], lr_coeffs["c"],0.0000001)
|
59
|
-
end
|
60
|
-
|
61
|
-
def test_probit
|
62
|
-
ds=Statsample::CSV.read(@file_binomial)
|
63
|
-
constant=([1.0]*ds.cases).to_vector(:scale)
|
64
|
-
ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c})
|
65
|
-
mat_x=ds_indep.to_matrix
|
66
|
-
mat_y=ds['y'].to_matrix(:vertical)
|
67
|
-
mle=Statsample::MLE::Probit.new
|
68
|
-
b_probit=mle.newton_raphson(mat_x,mat_y)
|
69
|
-
ll=mle.log_likehood(mat_x,mat_y,b_probit)
|
70
|
-
|
71
|
-
b_exp=[-3.0670,0.1763,0.4483,-0.2240]
|
72
|
-
b_exp.each_index{|i|
|
73
|
-
assert_in_delta(b_exp[i], b_probit[i,0], 0.001)
|
74
|
-
}
|
75
|
-
assert_in_delta(-38.31559,ll,0.0001)
|
76
|
-
end
|
77
|
-
def test_logit_crime
|
78
|
-
ds=Statsample::PlainText.read(@crime, %w{crimerat maleteen south educ police60 police59 labor males pop nonwhite unemp1 unemp2 median belowmed})
|
79
|
-
constant=([1.0]*ds.cases).to_vector(:scale)
|
80
|
-
ds2=ds.dup(%w{maleteen south educ police59})
|
81
|
-
ds2['constant']=constant
|
82
|
-
ds2.fields=%w{constant maleteen south educ police59}
|
83
|
-
mat_x=ds2.to_matrix
|
84
|
-
mat_y=(ds.compute "(crimerat>=110) ? 1:0").to_matrix(:vertical)
|
85
|
-
mle=Statsample::MLE::Logit.new
|
86
|
-
b=mle.newton_raphson(mat_x,mat_y)
|
87
|
-
ll=mle.log_likehood(mat_x,mat_y,b)
|
88
|
-
assert_in_delta(-18.606959,ll,0.001)
|
89
|
-
exp=[-17.701, 0.0833,-1.117, 0.0229, 0.0581]
|
90
|
-
exp.each_index{|i|
|
91
|
-
assert_in_delta(exp[i],b[i,0],0.001)
|
92
|
-
}
|
93
|
-
assert_equal(5,mle.iterations)
|
94
|
-
end
|
95
|
-
def atest_logit_alglib
|
96
|
-
if(HAS_ALGIB)
|
97
|
-
ds=Statsample::CSV.read(@file_binomial)
|
98
|
-
constant=([1.0]*ds.cases).to_vector(:scale)
|
99
|
-
|
100
|
-
ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c} )
|
101
|
-
|
102
|
-
mat_x=ds_indep.to_matrix
|
103
|
-
mat_y=ds['y'].to_matrix(:vertical)
|
104
|
-
log=Alglib::Logit.build_from_matrix(ds.to_matrix)
|
105
|
-
coeffs=log.unpack[0]
|
106
|
-
b_alglib=Matrix.columns([[-coeffs[3], -coeffs[0], -coeffs[1], -coeffs[2]]])
|
107
|
-
mle=Statsample::MLE::Logit.new
|
108
|
-
ll_alglib=mle.log_likehood(mat_x,mat_y,b_alglib)
|
109
|
-
b_newton=mle.newton_raphson(mat_x,mat_y)
|
110
|
-
ll_pure_ruby=mle.log_likehood(mat_x,mat_y,b_newton)
|
111
|
-
#p b_alglib
|
112
|
-
#p b_newton
|
113
|
-
|
114
|
-
assert_in_delta(ll_alglib,ll_pure_ruby,1)
|
115
|
-
end
|
3
|
+
class StatsampleMLETestCase < MiniTest::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@file_binomial=File.dirname(__FILE__)+'/../data/test_binomial.csv'
|
6
|
+
@crime=File.dirname(__FILE__)+'/../data/crime.txt'
|
7
|
+
@cases=100
|
8
|
+
a=Array.new()
|
9
|
+
b=Array.new()
|
10
|
+
c=Array.new()
|
11
|
+
y=Array.new()
|
116
12
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
13
|
+
@cases.times{|i|
|
14
|
+
a.push(2*rand()-i)
|
15
|
+
b.push(2*rand()-5+i)
|
16
|
+
c.push(2*rand()+i)
|
17
|
+
y_val=i+(rand()*@cases.quo(2) - @cases.quo(4))
|
18
|
+
y.push(y_val<(@cases/2.0) ? 0.0 : 1.0)
|
19
|
+
}
|
20
|
+
a=a.to_vector(:scale)
|
21
|
+
b=b.to_vector(:scale)
|
22
|
+
c=c.to_vector(:scale)
|
23
|
+
y=y.to_vector(:scale)
|
24
|
+
|
25
|
+
@ds_indep={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
26
|
+
constant=([1.0]*@cases).to_vector(:scale)
|
27
|
+
@ds_indep_2={'constant'=>constant,'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
28
|
+
@ds_indep_2.fields=%w{constant a b c}
|
29
|
+
@mat_x=@ds_indep_2.to_matrix
|
30
|
+
@mat_y=y.to_matrix(:vertical)
|
31
|
+
@ds=@ds_indep.dup
|
32
|
+
@ds.add_vector('y',y)
|
33
|
+
end
|
34
|
+
def test_normal
|
35
|
+
y=Array.new()
|
36
|
+
y=@ds_indep.collect{|row|
|
37
|
+
row['a']*5+row['b']+row['c']+rand()*3
|
38
|
+
}
|
39
|
+
constant=([1]*@cases).to_vector(:scale)
|
40
|
+
ds_indep_2=@ds_indep.dup
|
41
|
+
ds_indep_2['constant']=constant
|
42
|
+
ds_indep_2.fields=%w{constant a b c}
|
43
|
+
mat_x=ds_indep_2.to_matrix
|
44
|
+
mat_y=y.to_matrix(:vertical)
|
45
|
+
mle=Statsample::MLE::Normal.new()
|
46
|
+
mle.verbose=false
|
47
|
+
coeffs_nr=mle.newton_raphson(mat_x,mat_y)
|
48
|
+
#p coeffs_nr
|
49
|
+
ds=@ds_indep.dup
|
50
|
+
ds.add_vector('y',y)
|
51
|
+
lr=Statsample::Regression.multiple(ds,'y')
|
52
|
+
lr_constant = lr.constant
|
53
|
+
lr_coeffs = lr.coeffs
|
54
|
+
assert_in_delta(coeffs_nr[0,0], lr_constant,0.0000001)
|
55
|
+
assert_in_delta(coeffs_nr[1,0], lr_coeffs["a"],0.0000001)
|
56
|
+
assert_in_delta(coeffs_nr[2,0], lr_coeffs["b"],0.0000001)
|
57
|
+
assert_in_delta(coeffs_nr[3,0], lr_coeffs["c"],0.0000001)
|
58
|
+
end
|
128
59
|
|
60
|
+
def test_probit
|
61
|
+
ds=Statsample::CSV.read(@file_binomial)
|
62
|
+
constant=([1.0]*ds.cases).to_vector(:scale)
|
63
|
+
ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c})
|
64
|
+
mat_x=ds_indep.to_matrix
|
65
|
+
mat_y=ds['y'].to_matrix(:vertical)
|
66
|
+
mle=Statsample::MLE::Probit.new
|
67
|
+
b_probit=mle.newton_raphson(mat_x,mat_y)
|
68
|
+
ll=mle.log_likehood(mat_x,mat_y,b_probit)
|
129
69
|
|
70
|
+
b_exp=[-3.0670,0.1763,0.4483,-0.2240]
|
71
|
+
b_exp.each_index{|i|
|
72
|
+
assert_in_delta(b_exp[i], b_probit[i,0], 0.001)
|
73
|
+
}
|
74
|
+
assert_in_delta(-38.31559,ll,0.0001)
|
75
|
+
end
|
76
|
+
def test_logit_crime
|
77
|
+
ds=Statsample::PlainText.read(@crime, %w{crimerat maleteen south educ police60 police59 labor males pop nonwhite unemp1 unemp2 median belowmed})
|
78
|
+
constant=([1.0]*ds.cases).to_vector(:scale)
|
79
|
+
ds2=ds.dup(%w{maleteen south educ police59})
|
80
|
+
ds2['constant']=constant
|
81
|
+
ds2.fields=%w{constant maleteen south educ police59}
|
82
|
+
mat_x=ds2.to_matrix
|
83
|
+
mat_y=(ds.compute "(crimerat>=110) ? 1:0").to_matrix(:vertical)
|
84
|
+
mle=Statsample::MLE::Logit.new
|
85
|
+
b=mle.newton_raphson(mat_x,mat_y)
|
86
|
+
ll=mle.log_likehood(mat_x,mat_y,b)
|
87
|
+
assert_in_delta(-18.606959,ll,0.001)
|
88
|
+
exp=[-17.701, 0.0833,-1.117, 0.0229, 0.0581]
|
89
|
+
exp.each_index{|i|
|
90
|
+
assert_in_delta(exp[i],b[i,0],0.001)
|
91
|
+
}
|
92
|
+
assert_equal(5,mle.iterations)
|
93
|
+
end
|
94
|
+
def atest_logit_alglib
|
95
|
+
if(HAS_ALGIB)
|
96
|
+
ds=Statsample::CSV.read(@file_binomial)
|
97
|
+
constant=([1.0]*ds.cases).to_vector(:scale)
|
130
98
|
|
131
|
-
|
132
|
-
coeffs_nr=Statsample::MLE.newton_raphson(@mat_x,@mat_y, Statsample::MLE::Logit)
|
133
|
-
#p coeffs_nr
|
134
|
-
mle_pure_ruby=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,coeffs_nr)
|
135
|
-
#p mle_pure_ruby
|
99
|
+
ds_indep={'constant'=>constant, 'a'=>ds['a'],'b'=>ds['b'], 'c'=>ds['c']}.to_dataset(%w{constant a b c} )
|
136
100
|
|
137
|
-
|
101
|
+
mat_x=ds_indep.to_matrix
|
102
|
+
mat_y=ds['y'].to_matrix(:vertical)
|
103
|
+
log=Alglib::Logit.build_from_matrix(ds.to_matrix)
|
104
|
+
coeffs=log.unpack[0]
|
105
|
+
b_alglib=Matrix.columns([[-coeffs[3], -coeffs[0], -coeffs[1], -coeffs[2]]])
|
106
|
+
mle=Statsample::MLE::Logit.new
|
107
|
+
ll_alglib=mle.log_likehood(mat_x,mat_y,b_alglib)
|
108
|
+
b_newton=mle.newton_raphson(mat_x,mat_y)
|
109
|
+
ll_pure_ruby=mle.log_likehood(mat_x,mat_y,b_newton)
|
110
|
+
#p b_alglib
|
111
|
+
#p b_newton
|
112
|
+
|
113
|
+
assert_in_delta(ll_alglib,ll_pure_ruby,1)
|
138
114
|
end
|
115
|
+
|
116
|
+
end
|
117
|
+
def atest_logit1
|
118
|
+
log=Alglib::Logit.build_from_matrix(@ds.to_matrix)
|
119
|
+
coeffs=log.unpack[0]
|
120
|
+
b=Matrix.columns([[-coeffs[3],-coeffs[0],-coeffs[1],-coeffs[2]]])
|
121
|
+
# puts "Coeficientes beta alglib:"
|
122
|
+
#p b
|
123
|
+
mle_alglib=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,b)
|
124
|
+
# puts "MLE Alglib:"
|
125
|
+
#p mle_alglib
|
126
|
+
# Statsample::CSV.write(ds,"test_binomial.csv")
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
# puts "iniciando newton"
|
131
|
+
coeffs_nr=Statsample::MLE.newton_raphson(@mat_x,@mat_y, Statsample::MLE::Logit)
|
132
|
+
#p coeffs_nr
|
133
|
+
mle_pure_ruby=Statsample::MLE::ln_mle(Statsample::MLE::Logit, @mat_x,@mat_y,coeffs_nr)
|
134
|
+
#p mle_pure_ruby
|
135
|
+
|
136
|
+
#puts "Malo: #{mle_malo} Bueno: #{mle_bueno} : #{mle_malo-mle_bueno}"
|
137
|
+
end
|
139
138
|
end
|
140
139
|
|
data/test/test_multiset.rb
CHANGED
@@ -1,98 +1,97 @@
|
|
1
|
-
|
2
|
-
require 'statsample'
|
3
|
-
require 'test/unit'
|
1
|
+
require(File.dirname(__FILE__)+'/test_helpers.rb')
|
4
2
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
end
|
30
|
-
def test_creation_empty
|
31
|
-
ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
|
32
|
-
ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
|
33
|
-
ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
|
34
|
-
ms2=Statsample::Multiset.new(%w{id age name})
|
35
|
-
ms2.add_dataset('male',ds_male)
|
36
|
-
ms2.add_dataset('female',ds_female)
|
37
|
-
assert_equal(ms2.fields,ms.fields)
|
38
|
-
assert_equal(ms2['male'],ms['male'])
|
39
|
-
assert_equal(ms2['female'],ms['female'])
|
40
|
-
end
|
41
|
-
def test_to_multiset_by_split_one
|
42
|
-
sex=%w{m m m m m f f f f m}.to_vector(:nominal)
|
43
|
-
city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
|
44
|
-
age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
|
45
|
-
ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
|
46
|
-
ms=ds.to_multiset_by_split('sex')
|
47
|
-
assert_equal(2,ms.n_datasets)
|
48
|
-
assert_equal(%w{f m},ms.datasets.keys.sort)
|
49
|
-
assert_equal(6,ms['m'].cases)
|
50
|
-
assert_equal(4,ms['f'].cases)
|
51
|
-
assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
|
52
|
-
assert_equal([34,33,35,36],ms['f']['age'].to_a)
|
53
|
-
end
|
54
|
-
def test_to_multiset_by_split_multiple
|
55
|
-
sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
|
56
|
-
city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
|
57
|
-
hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
|
58
|
-
age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
|
59
|
-
ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
|
60
|
-
ms=ds.to_multiset_by_split('sex','city','hair')
|
61
|
-
assert_equal(8,ms.n_datasets)
|
62
|
-
assert_equal(3,ms[%w{m London blonde}].cases)
|
63
|
-
assert_equal(3,ms[%w{m London blonde}].cases)
|
64
|
-
assert_equal(1,ms[%w{m Paris black}].cases)
|
65
|
-
end
|
66
|
-
|
67
|
-
def test_stratum_proportion
|
68
|
-
ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
|
69
|
-
ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
|
70
|
-
assert_equal(5.0/12, ds1['q1'].proportion )
|
71
|
-
assert_equal(7.0/9, ds2['q1'].proportion )
|
72
|
-
ms=Statsample::Multiset.new(['q1'])
|
73
|
-
ms.add_dataset('d1',ds1)
|
74
|
-
ms.add_dataset('d2',ds2)
|
75
|
-
ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
|
76
|
-
assert_in_delta(0.655, ss.proportion('q1'),0.01)
|
77
|
-
assert_in_delta(0.345, ss.proportion('q1',0),0.01)
|
78
|
-
|
79
|
-
end
|
80
|
-
def test_stratum_scale
|
81
|
-
boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
|
82
|
-
girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
|
83
|
-
ms=Statsample::Multiset.new(['test'])
|
84
|
-
ms.add_dataset('boys',boys)
|
85
|
-
ms.add_dataset('girls',girls)
|
86
|
-
ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
|
87
|
-
assert_equal(2,ss.strata_number)
|
88
|
-
assert_equal(20000,ss.population_size)
|
89
|
-
assert_equal(10000,ss.stratum_size('boys'))
|
90
|
-
assert_equal(10000,ss.stratum_size('girls'))
|
91
|
-
assert_equal(36,ss.sample_size)
|
92
|
-
assert_equal(75,ss.mean('test'))
|
93
|
-
assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
|
94
|
-
assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
|
95
|
-
|
96
|
-
|
3
|
+
|
4
|
+
class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
|
5
|
+
def initialize(*args)
|
6
|
+
super
|
7
|
+
end
|
8
|
+
def test_creation
|
9
|
+
v1a=[1,2,3,4,5].to_vector
|
10
|
+
v2b=[11,21,31,41,51].to_vector
|
11
|
+
v3c=[21,23,34,45,56].to_vector
|
12
|
+
ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
|
13
|
+
v1b=[15,25,35,45,55].to_vector
|
14
|
+
v2b=[11,21,31,41,51].to_vector
|
15
|
+
v3b=[21,23,34,45,56].to_vector
|
16
|
+
ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
|
17
|
+
ms=Statsample::Multiset.new(['v1','v2','v3'])
|
18
|
+
ms.add_dataset('ds1',ds1)
|
19
|
+
ms.add_dataset('ds2',ds2)
|
20
|
+
assert_equal(ds1,ms['ds1'])
|
21
|
+
assert_equal(ds2,ms['ds2'])
|
22
|
+
assert_equal(v1a,ms['ds1']['v1'])
|
23
|
+
assert_not_equal(v1b,ms['ds1']['v1'])
|
24
|
+
ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
|
25
|
+
assert_raise ArgumentError do
|
26
|
+
ms.add_dataset(ds3)
|
97
27
|
end
|
98
|
-
end
|
28
|
+
end
|
29
|
+
def test_creation_empty
|
30
|
+
ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
|
31
|
+
ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
|
32
|
+
ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
|
33
|
+
ms2=Statsample::Multiset.new(%w{id age name})
|
34
|
+
ms2.add_dataset('male',ds_male)
|
35
|
+
ms2.add_dataset('female',ds_female)
|
36
|
+
assert_equal(ms2.fields,ms.fields)
|
37
|
+
assert_equal(ms2['male'],ms['male'])
|
38
|
+
assert_equal(ms2['female'],ms['female'])
|
39
|
+
end
|
40
|
+
def test_to_multiset_by_split_one
|
41
|
+
sex=%w{m m m m m f f f f m}.to_vector(:nominal)
|
42
|
+
city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
|
43
|
+
age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
|
44
|
+
ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
|
45
|
+
ms=ds.to_multiset_by_split('sex')
|
46
|
+
assert_equal(2,ms.n_datasets)
|
47
|
+
assert_equal(%w{f m},ms.datasets.keys.sort)
|
48
|
+
assert_equal(6,ms['m'].cases)
|
49
|
+
assert_equal(4,ms['f'].cases)
|
50
|
+
assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
|
51
|
+
assert_equal([34,33,35,36],ms['f']['age'].to_a)
|
52
|
+
end
|
53
|
+
def test_to_multiset_by_split_multiple
|
54
|
+
sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
|
55
|
+
city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
|
56
|
+
hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
|
57
|
+
age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
|
58
|
+
ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
|
59
|
+
ms=ds.to_multiset_by_split('sex','city','hair')
|
60
|
+
assert_equal(8,ms.n_datasets)
|
61
|
+
assert_equal(3,ms[%w{m London blonde}].cases)
|
62
|
+
assert_equal(3,ms[%w{m London blonde}].cases)
|
63
|
+
assert_equal(1,ms[%w{m Paris black}].cases)
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_stratum_proportion
|
67
|
+
ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
|
68
|
+
ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
|
69
|
+
assert_equal(5.0/12, ds1['q1'].proportion )
|
70
|
+
assert_equal(7.0/9, ds2['q1'].proportion )
|
71
|
+
ms=Statsample::Multiset.new(['q1'])
|
72
|
+
ms.add_dataset('d1',ds1)
|
73
|
+
ms.add_dataset('d2',ds2)
|
74
|
+
ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
|
75
|
+
assert_in_delta(0.655, ss.proportion('q1'),0.01)
|
76
|
+
assert_in_delta(0.345, ss.proportion('q1',0),0.01)
|
77
|
+
|
78
|
+
end
|
79
|
+
def test_stratum_scale
|
80
|
+
boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
|
81
|
+
girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
|
82
|
+
ms=Statsample::Multiset.new(['test'])
|
83
|
+
ms.add_dataset('boys',boys)
|
84
|
+
ms.add_dataset('girls',girls)
|
85
|
+
ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
|
86
|
+
assert_equal(2,ss.strata_number)
|
87
|
+
assert_equal(20000,ss.population_size)
|
88
|
+
assert_equal(10000,ss.stratum_size('boys'))
|
89
|
+
assert_equal(10000,ss.stratum_size('girls'))
|
90
|
+
assert_equal(36,ss.sample_size)
|
91
|
+
assert_equal(75,ss.mean('test'))
|
92
|
+
assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
|
93
|
+
assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
|
94
|
+
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|