statsample 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +12 -0
- data/Manifest.txt +13 -0
- data/README.txt +2 -1
- data/demo/pca.rb +29 -0
- data/demo/umann.rb +8 -0
- data/lib/distribution.rb +0 -1
- data/lib/matrix_extension.rb +35 -21
- data/lib/statsample.rb +31 -28
- data/lib/statsample/anova.rb +7 -2
- data/lib/statsample/bivariate.rb +17 -11
- data/lib/statsample/codification.rb +136 -87
- data/lib/statsample/combination.rb +0 -2
- data/lib/statsample/converter/csv18.rb +1 -1
- data/lib/statsample/converter/csv19.rb +1 -1
- data/lib/statsample/converters.rb +176 -171
- data/lib/statsample/crosstab.rb +227 -154
- data/lib/statsample/dataset.rb +94 -12
- data/lib/statsample/dominanceanalysis.rb +69 -62
- data/lib/statsample/dominanceanalysis/bootstrap.rb +25 -21
- data/lib/statsample/factor.rb +18 -0
- data/lib/statsample/factor/pca.rb +128 -0
- data/lib/statsample/factor/principalaxis.rb +133 -0
- data/lib/statsample/factor/rotation.rb +125 -0
- data/lib/statsample/histogram.rb +99 -0
- data/lib/statsample/mle.rb +125 -126
- data/lib/statsample/mle/logit.rb +91 -91
- data/lib/statsample/mle/probit.rb +84 -85
- data/lib/statsample/multiset.rb +1 -1
- data/lib/statsample/permutation.rb +96 -0
- data/lib/statsample/regression.rb +1 -1
- data/lib/statsample/regression/binomial.rb +89 -89
- data/lib/statsample/regression/binomial/logit.rb +9 -9
- data/lib/statsample/regression/binomial/probit.rb +9 -9
- data/lib/statsample/regression/multiple.rb +8 -14
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/rubyengine.rb +55 -55
- data/lib/statsample/resample.rb +12 -17
- data/lib/statsample/srs.rb +4 -1
- data/lib/statsample/test.rb +23 -22
- data/lib/statsample/test/umannwhitney.rb +182 -0
- data/lib/statsample/vector.rb +854 -815
- data/test/test_bivariate.rb +132 -132
- data/test/test_codification.rb +71 -50
- data/test/test_dataset.rb +19 -1
- data/test/test_factor.rb +44 -0
- data/test/test_histogram.rb +26 -0
- data/test/test_permutation.rb +37 -0
- data/test/test_statistics.rb +74 -63
- data/test/test_umannwhitney.rb +17 -0
- data/test/test_vector.rb +46 -30
- metadata +31 -4
data/test/test_bivariate.rb
CHANGED
@@ -3,144 +3,144 @@ require 'statsample'
|
|
3
3
|
require 'test/unit'
|
4
4
|
class StatsampleBivariateTestCase < Test::Unit::TestCase
|
5
5
|
def test_sum_of_codeviated
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
}
|
26
|
-
}
|
27
|
-
end
|
28
|
-
def test_tetrachoric
|
29
|
-
a,b,c,d=0,0,0,0
|
30
|
-
assert_raise RuntimeError do
|
31
|
-
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
32
|
-
end
|
33
|
-
a,b,c,d=10,10,0,0
|
34
|
-
assert_raise RuntimeError do
|
35
|
-
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
36
|
-
end
|
37
|
-
a,b,c,d=10,0,10,0
|
38
|
-
assert_raise RuntimeError do
|
39
|
-
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
6
|
+
v1=[1,2,3,4,5,6].to_vector(:scale)
|
7
|
+
v2=[6,2,4,10,12,8].to_vector(:scale)
|
8
|
+
assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
|
9
|
+
end
|
10
|
+
def test_pearson
|
11
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
12
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
13
|
+
assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
|
14
|
+
v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
|
15
|
+
v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
|
16
|
+
assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
|
17
|
+
end
|
18
|
+
def test_tetrachoric_matrix
|
19
|
+
ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
|
20
|
+
tcm_obs=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
|
21
|
+
tcm_exp=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_matrix.txt", %w{a b c d e}).to_matrix
|
22
|
+
tcm_obs.row_size.times do |i|
|
23
|
+
tcm_obs.column_size do |j|
|
24
|
+
assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
|
40
25
|
end
|
41
|
-
|
26
|
+
end
|
27
|
+
end
|
28
|
+
def test_tetrachoric
|
29
|
+
a,b,c,d=0,0,0,0
|
30
|
+
assert_raise RuntimeError do
|
42
31
|
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
43
|
-
|
44
|
-
|
45
|
-
|
32
|
+
end
|
33
|
+
a,b,c,d=10,10,0,0
|
34
|
+
assert_raise RuntimeError do
|
46
35
|
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
a,b,c,d = 30,40,70,20
|
36
|
+
end
|
37
|
+
a,b,c,d=10,0,10,0
|
38
|
+
assert_raise RuntimeError do
|
51
39
|
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
52
|
-
assert_in_delta(-0.53980,tc.r,0.0001)
|
53
|
-
assert_in_delta(0.09940,tc.se,0.0001)
|
54
|
-
assert_in_delta(0.31864,tc.threshold_x,0.0001)
|
55
|
-
assert_in_delta(-0.15731,tc.threshold_y,0.0001)
|
56
|
-
x=%w{a a a a b b b a b b a a b b}.to_vector
|
57
|
-
y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
|
58
|
-
# crosstab
|
59
|
-
# 0 1
|
60
|
-
# a 4 3
|
61
|
-
# b 2 5
|
62
|
-
a,b,c,d=4,3,2,5
|
63
|
-
tc1 = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
64
|
-
tc2 = Statsample::Bivariate::Tetrachoric.new_with_vectors(x,y)
|
65
|
-
assert_equal(tc1.r,tc2.r)
|
66
|
-
assert_equal(tc1.se,tc2.se)
|
67
|
-
|
68
40
|
end
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
#
|
41
|
+
a,b,c,d=10,0,0,10
|
42
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
43
|
+
assert_equal(1,tc.r)
|
44
|
+
assert_equal(0,tc.se)
|
45
|
+
a,b,c,d=0,10,10,0
|
46
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
47
|
+
assert_equal(-1,tc.r)
|
48
|
+
assert_equal(0,tc.se)
|
49
|
+
|
50
|
+
a,b,c,d = 30,40,70,20
|
51
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
52
|
+
assert_in_delta(-0.53980,tc.r,0.0001)
|
53
|
+
assert_in_delta(0.09940,tc.se,0.0001)
|
54
|
+
assert_in_delta(0.31864,tc.threshold_x,0.0001)
|
55
|
+
assert_in_delta(-0.15731,tc.threshold_y,0.0001)
|
56
|
+
x=%w{a a a a b b b a b b a a b b}.to_vector
|
57
|
+
y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
|
58
|
+
# crosstab
|
59
|
+
# 0 1
|
60
|
+
# a 4 3
|
61
|
+
# b 2 5
|
62
|
+
a,b,c,d=4,3,2,5
|
63
|
+
tc1 = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
64
|
+
tc2 = Statsample::Bivariate::Tetrachoric.new_with_vectors(x,y)
|
65
|
+
assert_equal(tc1.r,tc2.r)
|
66
|
+
assert_equal(tc1.se,tc2.se)
|
67
|
+
|
68
|
+
end
|
69
|
+
def test_matrix_correlation
|
70
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
71
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
72
|
+
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
73
|
+
v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
|
74
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
|
75
|
+
c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
|
76
|
+
expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
|
77
|
+
[c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
|
78
|
+
]
|
79
|
+
obt=Statsample::Bivariate.correlation_matrix(ds)
|
80
|
+
for i in 0...expected.row_size
|
81
|
+
for j in 0...expected.column_size
|
82
|
+
#puts expected[i,j].inspect
|
83
|
+
#puts obt[i,j].inspect
|
84
|
+
assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
|
85
|
+
end
|
88
86
|
end
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
87
|
+
#assert_equal(expected,obt)
|
88
|
+
end
|
89
|
+
def test_prop_pearson
|
90
|
+
assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
|
91
|
+
assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
|
92
|
+
r=0.9
|
93
|
+
n=100
|
94
|
+
t=Statsample::Bivariate.t_r(r,n)
|
95
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
|
96
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:right)<0.05)
|
97
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)
|
98
98
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
99
|
+
r=-0.9
|
100
|
+
n=100
|
101
|
+
t=Statsample::Bivariate.t_r(r,n)
|
102
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
|
103
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:right)>0.05)
|
104
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:left)<0.05)
|
105
|
+
end
|
106
|
+
def test_covariance
|
107
|
+
if HAS_GSL
|
108
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
109
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
110
|
+
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
111
|
+
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_spearman
|
116
|
+
v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
|
117
|
+
v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
|
118
|
+
assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
|
119
|
+
|
120
|
+
end
|
121
|
+
def test_point_biserial
|
122
|
+
c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
|
123
|
+
d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
|
124
|
+
assert_raise TypeError do
|
125
|
+
Statsample::Bivariate.point_biserial(c,d)
|
105
126
|
end
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
Statsample::Bivariate.point_biserial(c,d)
|
126
|
-
end
|
127
|
-
assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
|
128
|
-
end
|
129
|
-
def test_tau
|
130
|
-
v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
|
131
|
-
v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
|
132
|
-
assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
|
133
|
-
assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
|
134
|
-
v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
|
135
|
-
v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
|
136
|
-
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
|
137
|
-
end
|
138
|
-
def test_gamma
|
139
|
-
m=Matrix[[10,5,2],[10,15,20]]
|
140
|
-
assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
|
141
|
-
m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
|
142
|
-
assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
|
143
|
-
|
144
|
-
|
145
|
-
end
|
127
|
+
assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
|
128
|
+
end
|
129
|
+
def test_tau
|
130
|
+
v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
|
131
|
+
v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
|
132
|
+
assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
|
133
|
+
assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
|
134
|
+
v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
|
135
|
+
v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
|
136
|
+
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
|
137
|
+
end
|
138
|
+
def test_gamma
|
139
|
+
m=Matrix[[10,5,2],[10,15,20]]
|
140
|
+
assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
|
141
|
+
m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
|
142
|
+
assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
|
143
|
+
|
144
|
+
|
145
|
+
end
|
146
146
|
end
|
data/test/test_codification.rb
CHANGED
@@ -1,60 +1,81 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'tempfile'
|
4
|
+
require 'tmpdir'
|
4
5
|
require 'test/unit'
|
5
6
|
|
6
7
|
class StatsampleCodificationTestCase < Test::Unit::TestCase
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
def test_recode_dataset_simple
|
40
|
-
yaml=YAML::dump({'v1'=>@dict})
|
41
|
-
Statsample::Codification.recode_dataset_simple!(@ds,yaml)
|
42
|
-
expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
|
43
|
-
assert_not_equal(expected_vector,@ds['v1'])
|
44
|
-
assert_equal(expected_vector,@ds['v1_recoded'])
|
45
|
-
end
|
46
|
-
def test_recode_dataset_split
|
47
|
-
yaml=YAML::dump({'v1'=>@dict})
|
48
|
-
Statsample::Codification.recode_dataset_split!(@ds,yaml)
|
49
|
-
e={}
|
50
|
-
e['r']=[1,1,0,1,0,0,0].to_vector
|
51
|
-
e['w']=[0,1,1,0,0,0,0].to_vector
|
52
|
-
e['s']=[0,0,0,0,1,1,1].to_vector
|
53
|
-
e['d']=[0,0,0,0,0,1,1].to_vector
|
54
|
-
e.each{|k,expected|
|
55
|
-
assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
|
56
|
-
|
57
|
-
}
|
9
|
+
def initialize(*args)
|
10
|
+
v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
|
11
|
+
@dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s', 'dream'=>'d', 'dreaming'=>'d'}
|
12
|
+
@ds={"v1"=>v1}.to_dataset
|
13
|
+
super
|
14
|
+
end
|
15
|
+
def test_create_hash
|
16
|
+
expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
|
17
|
+
hash=Statsample::Codification.create_hash(@ds,['v1'])
|
18
|
+
assert_equal(['v1'],hash.keys)
|
19
|
+
assert_equal(expected_keys_v1,hash['v1'].keys.sort)
|
20
|
+
assert_equal(expected_keys_v1,hash['v1'].values.sort)
|
21
|
+
end
|
22
|
+
def test_create_excel
|
23
|
+
filename=Dir::tmpdir+"/test_excel"+Time.now().to_s+".xls"
|
24
|
+
#filename = Tempfile.new("test_codification_"+Time.now().to_s)
|
25
|
+
Statsample::Codification.create_excel(@ds, ['v1'], filename)
|
26
|
+
field=(["v1"]*8).to_vector
|
27
|
+
keys=%w{dream dreaming run running sleep sleeping walk walking}.to_vector
|
28
|
+
ds=Statsample::Excel.read(filename)
|
29
|
+
assert_equal(field, ds['field'])
|
30
|
+
assert_equal(keys, ds['original'])
|
31
|
+
assert_equal(keys, ds['recoded'])
|
32
|
+
hash=Statsample::Codification.excel_to_recoded_hash(filename)
|
33
|
+
assert_equal(keys.data, hash['v1'].keys.sort)
|
34
|
+
assert_equal(keys.data, hash['v1'].values.sort)
|
35
|
+
|
36
|
+
end
|
37
|
+
def test_create_yaml
|
38
|
+
assert_raise ArgumentError do
|
39
|
+
Statsample::Codification.create_yaml(@ds,[])
|
58
40
|
end
|
41
|
+
expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
|
42
|
+
yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
|
43
|
+
h=YAML::load(yaml_hash)
|
44
|
+
assert_equal(['v1'],h.keys)
|
45
|
+
assert_equal(expected_keys_v1,h['v1'].keys.sort)
|
46
|
+
tf = Tempfile.new("test_codification")
|
47
|
+
yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],tf, Statsample::SPLIT_TOKEN)
|
48
|
+
tf.close
|
49
|
+
tf.open
|
50
|
+
h=YAML::load(tf)
|
51
|
+
assert_equal(['v1'],h.keys)
|
52
|
+
assert_equal(expected_keys_v1,h['v1'].keys.sort)
|
53
|
+
tf.close(true)
|
54
|
+
end
|
55
|
+
def test_recodification
|
56
|
+
expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
|
57
|
+
assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
|
58
|
+
v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
|
59
|
+
expected=[['r'],['w','d'],nil,['w','d']]
|
60
|
+
assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
|
61
|
+
end
|
62
|
+
def test_recode_dataset_simple
|
63
|
+
Statsample::Codification.recode_dataset_simple!(@ds,{'v1'=>@dict})
|
64
|
+
expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
|
65
|
+
assert_not_equal(expected_vector,@ds['v1'])
|
66
|
+
assert_equal(expected_vector,@ds['v1_recoded'])
|
67
|
+
end
|
68
|
+
def test_recode_dataset_split
|
69
|
+
Statsample::Codification.recode_dataset_split!(@ds,{'v1'=>@dict})
|
70
|
+
e={}
|
71
|
+
e['r']=[1,1,0,1,0,0,0].to_vector
|
72
|
+
e['w']=[0,1,1,0,0,0,0].to_vector
|
73
|
+
e['s']=[0,0,0,0,1,1,1].to_vector
|
74
|
+
e['d']=[0,0,0,0,0,1,1].to_vector
|
75
|
+
e.each{|k,expected|
|
76
|
+
assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
|
77
|
+
|
78
|
+
}
|
79
|
+
end
|
59
80
|
|
60
81
|
end
|
data/test/test_dataset.rb
CHANGED
@@ -356,7 +356,7 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
356
356
|
assert_equal(vmult,ds.compute("v1*v2"))
|
357
357
|
|
358
358
|
end
|
359
|
-
|
359
|
+
def test_crosstab_with_asignation
|
360
360
|
v1=%w{a a a b b b c c c}.to_vector
|
361
361
|
v2=%w{a b c a b c a b c}.to_vector
|
362
362
|
v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
|
@@ -371,4 +371,22 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
371
371
|
ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
|
372
372
|
assert_equal(ds, ds2)
|
373
373
|
end
|
374
|
+
def test_one_to_many
|
375
|
+
cases=[
|
376
|
+
['1','george','red',10,'blue',20,nil,nil],
|
377
|
+
['2','fred','green',15,'orange',30,'white',20],
|
378
|
+
['3','alfred',nil,nil,nil,nil,nil,nil]
|
379
|
+
]
|
380
|
+
ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
|
381
|
+
cases.each {|c| ds.add_case_array c }
|
382
|
+
ds.update_valid_data
|
383
|
+
ids=%w{1 1 2 2 2}.to_vector
|
384
|
+
colors=%w{red blue green orange white}.to_vector
|
385
|
+
values=[10,20,15,30,20].to_vector
|
386
|
+
col_ids=[1,2,1,2,3].to_scale
|
387
|
+
ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
|
388
|
+
assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
|
389
|
+
|
390
|
+
end
|
391
|
+
|
374
392
|
end
|
data/test/test_factor.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
|
+
require 'statsample'
|
3
|
+
require 'test/unit'
|
4
|
+
require 'matrix_extension'
|
5
|
+
class StatsampleFactorTestCase < Test::Unit::TestCase
|
6
|
+
def test_pca
|
7
|
+
require 'gsl'
|
8
|
+
a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
|
9
|
+
b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
|
10
|
+
a.recode! {|c| c-a.mean}
|
11
|
+
b.recode! {|c| c-b.mean}
|
12
|
+
ds={'a'=>a,'b'=>b}.to_dataset
|
13
|
+
cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
|
14
|
+
pca=Statsample::Factor::PCA.new(cov_matrix)
|
15
|
+
expected_eigenvalues=[1.284, 0.0490]
|
16
|
+
expected_eigenvalues.each_with_index{|ev,i|
|
17
|
+
assert_in_delta(ev,pca.eigenvalues[i],0.001)
|
18
|
+
}
|
19
|
+
expected_fm_1=GSL::Matrix[[0.677], [0.735]]
|
20
|
+
expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
|
21
|
+
_test_matrix(expected_fm_1,pca.feature_vector(1))
|
22
|
+
_test_matrix(expected_fm_2,pca.feature_vector(2))
|
23
|
+
end
|
24
|
+
def test_rotation_varimax
|
25
|
+
a = Matrix[ [ 0.4320, 0.8129, 0.3872] ,
|
26
|
+
[0.7950, -0.5416, 0.2565] ,
|
27
|
+
[0.5944, 0.7234, -0.3441],
|
28
|
+
[0.8945, -0.3921, -0.1863] ]
|
29
|
+
expected= Matrix[[-0.0204423, 0.938674, -0.340334],
|
30
|
+
[0.983662, 0.0730206, 0.134997],
|
31
|
+
[0.0826106, 0.435975, -0.893379],
|
32
|
+
[0.939901, -0.0965213, -0.309596]].to_gsl
|
33
|
+
varimax=Statsample::Factor::Varimax.new(a)
|
34
|
+
varimax.iterate
|
35
|
+
_test_matrix(expected,varimax.rotated)
|
36
|
+
end
|
37
|
+
def _test_matrix(a,b)
|
38
|
+
a.size1.times {|i|
|
39
|
+
a.size2.times {|j|
|
40
|
+
assert_in_delta(a[i,j], b[i,j],0.001)
|
41
|
+
}
|
42
|
+
}
|
43
|
+
end
|
44
|
+
end
|