statsample 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +4 -0
- data/README.txt +5 -5
- data/demo/dominance_analysis_bootstrap.rb +9 -3
- data/demo/dominanceanalysis.rb +23 -7
- data/demo/multivariate_correlation.rb +26 -0
- data/lib/statsample.rb +1 -1
- data/lib/statsample/bivariate.rb +24 -4
- data/lib/statsample/bivariate/polychoric.rb +15 -14
- data/lib/statsample/converters.rb +27 -23
- data/lib/statsample/crosstab.rb +1 -44
- data/lib/statsample/dominanceanalysis.rb +158 -64
- data/lib/statsample/dominanceanalysis/bootstrap.rb +16 -7
- data/lib/statsample/matrix.rb +145 -13
- data/lib/statsample/multiset.rb +248 -265
- data/lib/statsample/regression.rb +3 -0
- data/lib/statsample/regression/multiple.rb +65 -23
- data/lib/statsample/regression/multiple/baseengine.rb +19 -20
- data/lib/statsample/regression/multiple/matrixengine.rb +187 -0
- data/lib/statsample/regression/multiple/rubyengine.rb +58 -98
- data/test/test_bivariate.rb +1 -0
- data/test/test_crosstab.rb +0 -3
- data/test/test_dataset.rb +379 -379
- data/test/test_dominance_analysis.rb +43 -0
- data/test/test_matrix.rb +52 -0
- data/test/test_regression.rb +174 -129
- data/test/test_svg_graph.rb +51 -51
- metadata +29 -3
data/test/test_bivariate.rb
CHANGED
data/test/test_crosstab.rb
CHANGED
data/test/test_dataset.rb
CHANGED
@@ -3,390 +3,390 @@ require 'statsample'
|
|
3
3
|
require 'test/unit'
|
4
4
|
require 'tmpdir'
|
5
5
|
class StatsampleDatasetTestCase < Test::Unit::TestCase
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
6
|
+
def setup
|
7
|
+
@ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
|
8
|
+
'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
|
9
|
+
'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
|
10
|
+
end
|
11
|
+
def test_basic
|
12
|
+
assert_equal(5,@ds.cases)
|
13
|
+
assert_equal(%w{id name age city a1}, @ds.fields)
|
14
|
+
end
|
15
|
+
def test_saveload
|
16
|
+
outfile=Dir::tmpdir+"/dataset.ds"
|
17
|
+
@ds.save(outfile)
|
18
|
+
a=Statsample.load(outfile)
|
19
|
+
assert_equal(@ds,a)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_matrix
|
23
|
+
matrix=Matrix[[1,2],[3,4],[5,6]]
|
24
|
+
ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
|
25
|
+
assert_equal(matrix,ds.to_matrix)
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_fields
|
29
|
+
@ds.fields=%w{name a1 id age city}
|
30
|
+
assert_equal(%w{name a1 id age city}, @ds.fields)
|
31
|
+
@ds.fields=%w{id name age}
|
32
|
+
assert_equal(%w{id name age a1 city}, @ds.fields)
|
33
|
+
end
|
34
|
+
def test_merge
|
35
|
+
a=[1,2,3].to_scale
|
36
|
+
b=[3,4,5].to_vector
|
37
|
+
c=[4,5,6].to_scale
|
38
|
+
d=[7,8,9].to_vector
|
39
|
+
e=[10,20,30].to_vector
|
40
|
+
ds1={'a'=>a,'b'=>b}.to_dataset
|
41
|
+
ds2={'c'=>c,'d'=>d}.to_dataset
|
42
|
+
exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
43
|
+
|
44
|
+
assert_equal(exp,ds1.merge(ds2))
|
45
|
+
exp.fields=%w{c d a b}
|
46
|
+
assert_equal(exp,ds2.merge(ds1))
|
47
|
+
ds3={'a'=>e}.to_dataset
|
48
|
+
exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
|
49
|
+
exp.fields=%w{a_1 b a_2}
|
50
|
+
assert_equal(exp,ds1.merge(ds3))
|
51
|
+
end
|
52
|
+
def test_each_vector
|
53
|
+
a=[1,2,3].to_vector
|
54
|
+
b=[3,4,5].to_vector
|
55
|
+
fields=["a","b"]
|
56
|
+
ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
|
57
|
+
res=[]
|
58
|
+
ds.each_vector{|k,v|
|
59
|
+
res.push([k,v])
|
60
|
+
}
|
61
|
+
assert_equal([["a",a],["b",b]],res)
|
62
|
+
ds.fields=["b","a"]
|
63
|
+
res=[]
|
64
|
+
ds.each_vector{|k,v|
|
65
|
+
res.push([k,v])
|
66
|
+
}
|
67
|
+
assert_equal([["b",b],["a",a]],res)
|
68
|
+
end
|
69
|
+
def test_equality
|
70
|
+
v1=[1,2,3,4].to_vector
|
71
|
+
v2=[5,6,7,8].to_vector
|
72
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
|
73
|
+
v3=[1,2,3,4].to_vector
|
74
|
+
v4=[5,6,7,8].to_vector
|
75
|
+
ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
|
76
|
+
assert_equal(ds1,ds2)
|
77
|
+
ds2.fields=%w{v1 v2}
|
78
|
+
assert_not_equal(ds1,ds2)
|
79
|
+
end
|
80
|
+
def test_add_vector
|
81
|
+
v=Statsample::Vector.new(%w{a b c d e})
|
82
|
+
@ds.add_vector('new',v)
|
83
|
+
assert_equal(%w{id name age city a1 new},@ds.fields)
|
84
|
+
x=Statsample::Vector.new(%w{a b c d e f g})
|
85
|
+
assert_raise ArgumentError do
|
86
|
+
@ds.add_vector('new2',x)
|
26
87
|
end
|
88
|
+
end
|
89
|
+
def test_vector_by_calculation
|
90
|
+
a1=[1,2,3,4,5,6,7].to_vector(:scale)
|
91
|
+
a2=[10,20,30,40,50,60,70].to_vector(:scale)
|
92
|
+
a3=[100,200,300,400,500,600,700].to_vector(:scale)
|
93
|
+
ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
|
94
|
+
total=ds.vector_by_calculation() {|row|
|
95
|
+
row['a1']+row['a2']+row['a3']
|
96
|
+
}
|
97
|
+
expected=[111,222,333,444,555,666,777].to_vector(:scale)
|
98
|
+
assert_equal(expected,total)
|
99
|
+
end
|
100
|
+
def test_vector_sum
|
101
|
+
a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
|
102
|
+
a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
|
103
|
+
b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
|
104
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
105
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
|
106
|
+
total=ds.vector_sum
|
107
|
+
a=ds.vector_sum(['a1','a2'])
|
108
|
+
b=ds.vector_sum(['b1','b2'])
|
109
|
+
expected_a=[11,12,23,24,25,nil].to_vector(:scale)
|
110
|
+
expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
|
111
|
+
expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
|
112
|
+
assert_equal(expected_a, a)
|
113
|
+
assert_equal(expected_b, b)
|
114
|
+
assert_equal(expected_total, total)
|
115
|
+
end
|
116
|
+
def test_vector_missing_values
|
117
|
+
a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
|
118
|
+
a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
|
119
|
+
b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
|
120
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
121
|
+
c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
|
122
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
123
|
+
mva=[2,3,0,1,0,1].to_vector(:scale)
|
124
|
+
assert_equal(mva,ds.vector_missing_values)
|
125
|
+
end
|
126
|
+
def test_vector_count_characters
|
127
|
+
a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
|
128
|
+
a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
|
129
|
+
b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
|
130
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
131
|
+
c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
|
132
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
133
|
+
exp=[4,17,27,5,6,5].to_vector(:scale)
|
134
|
+
assert_equal(exp,ds.vector_count_characters)
|
27
135
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
v2=[5,6,7,8].to_vector
|
72
|
-
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
|
73
|
-
v3=[1,2,3,4].to_vector
|
74
|
-
v4=[5,6,7,8].to_vector
|
75
|
-
ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
|
76
|
-
assert_equal(ds1,ds2)
|
77
|
-
ds2.fields=%w{v1 v2}
|
78
|
-
assert_not_equal(ds1,ds2)
|
79
|
-
end
|
80
|
-
def test_add_vector
|
81
|
-
v=Statsample::Vector.new(%w{a b c d e})
|
82
|
-
@ds.add_vector('new',v)
|
83
|
-
assert_equal(%w{id name age city a1 new},@ds.fields)
|
84
|
-
x=Statsample::Vector.new(%w{a b c d e f g})
|
85
|
-
assert_raise ArgumentError do
|
86
|
-
@ds.add_vector('new2',x)
|
87
|
-
end
|
88
|
-
end
|
89
|
-
def test_vector_by_calculation
|
90
|
-
a1=[1,2,3,4,5,6,7].to_vector(:scale)
|
91
|
-
a2=[10,20,30,40,50,60,70].to_vector(:scale)
|
92
|
-
a3=[100,200,300,400,500,600,700].to_vector(:scale)
|
93
|
-
ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
|
94
|
-
total=ds.vector_by_calculation() {|row|
|
95
|
-
row['a1']+row['a2']+row['a3']
|
96
|
-
}
|
97
|
-
expected=[111,222,333,444,555,666,777].to_vector(:scale)
|
98
|
-
assert_equal(expected,total)
|
99
|
-
end
|
100
|
-
def test_vector_sum
|
101
|
-
a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
|
102
|
-
a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
|
103
|
-
b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
|
104
|
-
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
105
|
-
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
|
106
|
-
total=ds.vector_sum
|
107
|
-
a=ds.vector_sum(['a1','a2'])
|
108
|
-
b=ds.vector_sum(['b1','b2'])
|
109
|
-
expected_a=[11,12,23,24,25,nil].to_vector(:scale)
|
110
|
-
expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
|
111
|
-
expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
|
112
|
-
assert_equal(expected_a, a)
|
113
|
-
assert_equal(expected_b, b)
|
114
|
-
assert_equal(expected_total, total)
|
115
|
-
end
|
116
|
-
def test_vector_missing_values
|
117
|
-
a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
|
118
|
-
a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
|
119
|
-
b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
|
120
|
-
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
121
|
-
c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
|
122
|
-
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
123
|
-
mva=[2,3,0,1,0,1].to_vector(:scale)
|
124
|
-
assert_equal(mva,ds.vector_missing_values)
|
125
|
-
end
|
126
|
-
def test_vector_count_characters
|
127
|
-
a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
|
128
|
-
a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
|
129
|
-
b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
|
130
|
-
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
131
|
-
c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
|
132
|
-
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
133
|
-
exp=[4,17,27,5,6,5].to_vector(:scale)
|
134
|
-
assert_equal(exp,ds.vector_count_characters)
|
135
|
-
|
136
|
-
end
|
137
|
-
def test_vector_mean
|
138
|
-
a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
|
139
|
-
a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
|
140
|
-
b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
|
141
|
-
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
142
|
-
c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
|
143
|
-
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
144
|
-
total=ds.vector_mean
|
145
|
-
a=ds.vector_mean(['a1','a2'],1)
|
146
|
-
b=ds.vector_mean(['b1','b2'],1)
|
147
|
-
c=ds.vector_mean(['b1','b2','c'],1)
|
148
|
-
expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
|
149
|
-
expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
|
150
|
-
expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
|
151
|
-
expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
|
152
|
-
assert_equal(expected_a, a)
|
153
|
-
assert_equal(expected_b, b)
|
154
|
-
assert_equal(expected_c, c)
|
155
|
-
assert_equal(expected_total, total)
|
156
|
-
end
|
136
|
+
end
|
137
|
+
def test_vector_mean
|
138
|
+
a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
|
139
|
+
a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
|
140
|
+
b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
|
141
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
142
|
+
c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
|
143
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
144
|
+
total=ds.vector_mean
|
145
|
+
a=ds.vector_mean(['a1','a2'],1)
|
146
|
+
b=ds.vector_mean(['b1','b2'],1)
|
147
|
+
c=ds.vector_mean(['b1','b2','c'],1)
|
148
|
+
expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
|
149
|
+
expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
|
150
|
+
expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
|
151
|
+
expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
|
152
|
+
assert_equal(expected_a, a)
|
153
|
+
assert_equal(expected_b, b)
|
154
|
+
assert_equal(expected_c, c)
|
155
|
+
assert_equal(expected_total, total)
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_each_array
|
159
|
+
expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
|
160
|
+
out=[]
|
161
|
+
@ds.each_array{ |a|
|
162
|
+
out.push(a)
|
163
|
+
}
|
164
|
+
assert_equal(expected,out)
|
165
|
+
end
|
166
|
+
def test_recode
|
167
|
+
@ds['age'].type=:scale
|
168
|
+
@ds.recode!("age") {|c| c['id']*2}
|
169
|
+
expected=[2,4,6,8,10].to_vector(:scale)
|
170
|
+
assert_equal(expected,@ds['age'])
|
171
|
+
end
|
172
|
+
def test_case_as
|
173
|
+
assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds.case_as_hash(0))
|
174
|
+
assert_equal([5,'George',5,'Tome','a,b,c'],@ds.case_as_array(4))
|
175
|
+
# Native methods
|
176
|
+
assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds._case_as_hash(0))
|
177
|
+
assert_equal([5,'George',5,'Tome','a,b,c'],@ds._case_as_array(4))
|
178
|
+
|
157
179
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
assert_equal(ds_exp,ds3)
|
265
|
-
assert_not_same(ds_exp,ds3)
|
266
|
-
assert_equal(ds3['v1'],ds_exp['v1'])
|
267
|
-
assert_not_same(ds3['v1'],ds_exp['v1'])
|
268
|
-
assert_equal(ds3.fields,ds_exp.fields)
|
269
|
-
assert_not_same(ds3.fields,ds_exp.fields)
|
270
|
-
|
271
|
-
|
272
|
-
# empty
|
273
|
-
ds3=ds1.dup_empty
|
274
|
-
assert_not_equal(ds1,ds3)
|
275
|
-
assert_not_equal(ds1['v1'],ds3['v1'])
|
276
|
-
assert_equal([],ds3['v1'].data)
|
277
|
-
assert_equal([],ds3['v2'].data)
|
278
|
-
assert_equal(:scale,ds3['v1'].type)
|
279
|
-
assert_equal(ds1.fields,ds2.fields)
|
280
|
-
assert_not_same(ds1.fields,ds2.fields)
|
281
|
-
end
|
282
|
-
def test_from_to
|
283
|
-
assert_equal(%w{name age city}, @ds.from_to("name","city"))
|
284
|
-
assert_raise ArgumentError do
|
285
|
-
@ds.from_to("name","a2")
|
286
|
-
end
|
180
|
+
|
181
|
+
end
|
182
|
+
def test_delete_vector
|
183
|
+
@ds.delete_vector('name')
|
184
|
+
assert_equal(%w{id age city a1},@ds.fields)
|
185
|
+
assert_equal(%w{a1 age city id},@ds.vectors.keys.sort)
|
186
|
+
end
|
187
|
+
def test_change_type
|
188
|
+
@ds.col('age').type=:scale
|
189
|
+
assert_equal(:scale,@ds.col('age').type)
|
190
|
+
end
|
191
|
+
def test_split_by_separator_recode
|
192
|
+
@ds.add_vectors_by_split_recode("a1","_")
|
193
|
+
assert_equal(%w{id name age city a1 a1_1 a1_2 a1_3},@ds.fields)
|
194
|
+
assert_equal([1,0,1,nil,1],@ds.col('a1_1').to_a)
|
195
|
+
assert_equal([1,1,0,nil,1],@ds.col('a1_2').to_a)
|
196
|
+
assert_equal([0,1,0,nil,1],@ds.col('a1_3').to_a)
|
197
|
+
assert_equal({'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'},@ds.labels)
|
198
|
+
end
|
199
|
+
def test_split_by_separator
|
200
|
+
@ds.add_vectors_by_split("a1","_")
|
201
|
+
assert_equal(%w{id name age city a1 a1_a a1_b a1_c},@ds.fields)
|
202
|
+
assert_equal([1,0,1,nil,1],@ds.col('a1_a').to_a)
|
203
|
+
assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
|
204
|
+
assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
|
205
|
+
end
|
206
|
+
def test_percentiles
|
207
|
+
v1=(1..100).to_a.to_scale
|
208
|
+
assert_equal(50.5,v1.median)
|
209
|
+
assert_equal(25.5, v1.percentil(25))
|
210
|
+
v2=(1..99).to_a.to_scale
|
211
|
+
assert_equal(50,v2.median)
|
212
|
+
assert_equal(25,v2.percentil(25))
|
213
|
+
v3=(1..50).to_a.to_scale
|
214
|
+
assert_equal(25.5, v3.median)
|
215
|
+
assert_equal(13, v3.percentil(25))
|
216
|
+
|
217
|
+
end
|
218
|
+
def test_add_case
|
219
|
+
ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
|
220
|
+
ds.add_case([1,2,3])
|
221
|
+
ds.add_case({'a'=>4,'b'=>5,'c'=>6})
|
222
|
+
ds.add_case([[7,8,9],%w{a b c}])
|
223
|
+
assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
|
224
|
+
assert_equal([4,5,6],ds.case_as_array(1))
|
225
|
+
assert_equal([7,8,9],ds.case_as_array(2))
|
226
|
+
assert_equal(['a','b','c'],ds.case_as_array(3))
|
227
|
+
ds.add_case_array([6,7,1])
|
228
|
+
ds.update_valid_data
|
229
|
+
assert_equal([6,7,1],ds.case_as_array(4))
|
230
|
+
|
231
|
+
end
|
232
|
+
def test_marshaling
|
233
|
+
ds_marshal=Marshal.load(Marshal.dump(@ds))
|
234
|
+
assert_equal(ds_marshal,@ds)
|
235
|
+
end
|
236
|
+
def test_range
|
237
|
+
v1=[1,2,3,4].to_vector
|
238
|
+
v2=[5,6,7,8].to_vector
|
239
|
+
v3=[9,10,11,12].to_vector
|
240
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
|
241
|
+
assert_same(v1,ds1['v1'])
|
242
|
+
ds2=ds1["v2".."v1"]
|
243
|
+
assert_equal(%w{v2 v1},ds2.fields)
|
244
|
+
assert_same(ds1['v1'],ds2['v1'])
|
245
|
+
assert_same(ds1['v2'],ds2['v2'])
|
246
|
+
|
247
|
+
|
248
|
+
end
|
249
|
+
def test_dup
|
250
|
+
v1=[1,2,3,4].to_vector
|
251
|
+
v2=[5,6,7,8].to_vector
|
252
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
|
253
|
+
ds2=ds1.dup
|
254
|
+
assert_equal(ds1,ds2)
|
255
|
+
assert_not_same(ds1,ds2)
|
256
|
+
assert_equal(ds1['v1'],ds2['v1'])
|
257
|
+
assert_not_same(ds1['v1'],ds2['v1'])
|
258
|
+
assert_equal(ds1.fields,ds2.fields)
|
259
|
+
assert_not_same(ds1.fields,ds2.fields)
|
260
|
+
ds1['v1'].type=:scale
|
261
|
+
# dup partial
|
262
|
+
ds3=ds1.dup('v1')
|
263
|
+
ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
|
264
|
+
assert_equal(ds_exp,ds3)
|
265
|
+
assert_not_same(ds_exp,ds3)
|
266
|
+
assert_equal(ds3['v1'],ds_exp['v1'])
|
267
|
+
assert_not_same(ds3['v1'],ds_exp['v1'])
|
268
|
+
assert_equal(ds3.fields,ds_exp.fields)
|
269
|
+
assert_not_same(ds3.fields,ds_exp.fields)
|
270
|
+
|
271
|
+
|
272
|
+
# empty
|
273
|
+
ds3=ds1.dup_empty
|
274
|
+
assert_not_equal(ds1,ds3)
|
275
|
+
assert_not_equal(ds1['v1'],ds3['v1'])
|
276
|
+
assert_equal([],ds3['v1'].data)
|
277
|
+
assert_equal([],ds3['v2'].data)
|
278
|
+
assert_equal(:scale,ds3['v1'].type)
|
279
|
+
assert_equal(ds1.fields,ds2.fields)
|
280
|
+
assert_not_same(ds1.fields,ds2.fields)
|
281
|
+
end
|
282
|
+
def test_from_to
|
283
|
+
assert_equal(%w{name age city}, @ds.from_to("name","city"))
|
284
|
+
assert_raise ArgumentError do
|
285
|
+
@ds.from_to("name","a2")
|
287
286
|
end
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
ds2=ds1.dup_empty
|
294
|
-
ds1.each_array_with_nils {|row|
|
295
|
-
ds2.add_case_array(row)
|
296
|
-
}
|
297
|
-
ds2.update_valid_data
|
298
|
-
assert_equal([1,nil,3,4,nil],ds2['v1'].data)
|
299
|
-
assert_equal([5,6,nil,8,20],ds2['v2'].data)
|
300
|
-
end
|
301
|
-
def test_dup_only_valid
|
302
|
-
v1=[1,nil,3,4].to_vector(:scale)
|
303
|
-
v2=[5,6,nil,8].to_vector(:scale)
|
304
|
-
v3=[9,10,11,12].to_vector(:scale)
|
287
|
+
end
|
288
|
+
def test_each_array_with_nils
|
289
|
+
v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
|
290
|
+
v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
|
291
|
+
v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
|
305
292
|
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
|
306
|
-
ds2=ds1.
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
|
314
|
-
expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
|
315
|
-
'city'=>Statsample::Vector.new(['London','Paris']),
|
316
|
-
'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
|
317
|
-
assert_equal(expected,filtered)
|
318
|
-
end
|
319
|
-
def test_filter_field
|
320
|
-
@ds['age'].type=:scale
|
321
|
-
filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
|
322
|
-
expected=[2,4].to_vector
|
323
|
-
assert_equal(expected,filtered)
|
324
|
-
|
325
|
-
end
|
326
|
-
def test_verify
|
327
|
-
name=%w{r1 r2 r3 r4}.to_vector(:nominal)
|
328
|
-
v1=[1,2,3,4].to_vector(:scale)
|
329
|
-
v2=[4,3,2,1].to_vector(:scale)
|
330
|
-
v3=[10,20,30,40].to_vector(:scale)
|
331
|
-
v4=%w{a b a b}.to_vector(:nominal)
|
332
|
-
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
|
333
|
-
ds.fields=%w{v1 v2 v3 v4 id}
|
334
|
-
#Correct
|
335
|
-
t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
|
336
|
-
t2=create_test("v3=v1*10") {|r| r['v3']==r['v1']*10}
|
337
|
-
# Fail!
|
338
|
-
t3=create_test("v4='b'") {|r| r['v4']=='b'}
|
339
|
-
exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
|
340
|
-
exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
|
341
|
-
res=ds.verify(t3,t1,t2)
|
342
|
-
assert_equal(exp1,res)
|
343
|
-
res=ds.verify('id',t1,t2,t3)
|
344
|
-
assert_equal(exp2,res)
|
345
|
-
end
|
346
|
-
def test_compute_operation
|
347
|
-
v1=[1,2,3,4].to_vector(:scale)
|
348
|
-
v2=[4,3,2,1].to_vector(:scale)
|
349
|
-
v3=[10,20,30,40].to_vector(:scale)
|
350
|
-
vscale=[1.quo(2),1,3.quo(2),2].to_vector(:scale)
|
351
|
-
vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
|
352
|
-
vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
|
353
|
-
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
|
354
|
-
assert_equal(vscale,ds.compute("v1/2"))
|
355
|
-
assert_equal(vsum,ds.compute("v1+v2+v3"))
|
356
|
-
assert_equal(vmult,ds.compute("v1*v2"))
|
357
|
-
|
358
|
-
end
|
359
|
-
def test_crosstab_with_asignation
|
360
|
-
v1=%w{a a a b b b c c c}.to_vector
|
361
|
-
v2=%w{a b c a b c a b c}.to_vector
|
362
|
-
v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
|
363
|
-
ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
|
364
|
-
assert_equal(:nominal, ds['_id'].type)
|
365
|
-
assert_equal(:scale, ds['a'].type)
|
366
|
-
assert_equal(:scale, ds['b'].type)
|
367
|
-
ev_id=%w{a b c}.to_vector
|
368
|
-
ev_a =%w{0 0 0}.to_scale
|
369
|
-
ev_b =%w{1 1 0}.to_scale
|
370
|
-
ev_c =%w{0 1 1}.to_scale
|
371
|
-
ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
|
372
|
-
assert_equal(ds, ds2)
|
373
|
-
end
|
374
|
-
def test_one_to_many
|
375
|
-
cases=[
|
376
|
-
['1','george','red',10,'blue',20,nil,nil],
|
377
|
-
['2','fred','green',15,'orange',30,'white',20],
|
378
|
-
['3','alfred',nil,nil,nil,nil,nil,nil]
|
379
|
-
]
|
380
|
-
ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
|
381
|
-
cases.each {|c| ds.add_case_array c }
|
382
|
-
ds.update_valid_data
|
383
|
-
ids=%w{1 1 2 2 2}.to_vector
|
384
|
-
colors=%w{red blue green orange white}.to_vector
|
385
|
-
values=[10,20,15,30,20].to_vector
|
386
|
-
col_ids=[1,2,1,2,3].to_scale
|
387
|
-
ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
|
388
|
-
assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
|
389
|
-
|
293
|
+
ds2=ds1.dup_empty
|
294
|
+
ds1.each_array_with_nils {|row|
|
295
|
+
ds2.add_case_array(row)
|
296
|
+
}
|
297
|
+
ds2.update_valid_data
|
298
|
+
assert_equal([1,nil,3,4,nil],ds2['v1'].data)
|
299
|
+
assert_equal([5,6,nil,8,20],ds2['v2'].data)
|
390
300
|
end
|
301
|
+
def test_dup_only_valid
|
302
|
+
v1=[1,nil,3,4].to_vector(:scale)
|
303
|
+
v2=[5,6,nil,8].to_vector(:scale)
|
304
|
+
v3=[9,10,11,12].to_vector(:scale)
|
305
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
|
306
|
+
ds2=ds1.dup_only_valid
|
307
|
+
expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
|
308
|
+
assert_equal(expected,ds2)
|
309
|
+
assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
|
310
|
+
end
|
311
|
+
def test_filter
|
312
|
+
@ds['age'].type=:scale
|
313
|
+
filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
|
314
|
+
expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
|
315
|
+
'city'=>Statsample::Vector.new(['London','Paris']),
|
316
|
+
'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
|
317
|
+
assert_equal(expected,filtered)
|
318
|
+
end
|
319
|
+
def test_filter_field
|
320
|
+
@ds['age'].type=:scale
|
321
|
+
filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
|
322
|
+
expected=[2,4].to_vector
|
323
|
+
assert_equal(expected,filtered)
|
324
|
+
|
325
|
+
end
|
326
|
+
def test_verify
|
327
|
+
name=%w{r1 r2 r3 r4}.to_vector(:nominal)
|
328
|
+
v1=[1,2,3,4].to_vector(:scale)
|
329
|
+
v2=[4,3,2,1].to_vector(:scale)
|
330
|
+
v3=[10,20,30,40].to_vector(:scale)
|
331
|
+
v4=%w{a b a b}.to_vector(:nominal)
|
332
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
|
333
|
+
ds.fields=%w{v1 v2 v3 v4 id}
|
334
|
+
#Correct
|
335
|
+
t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
|
336
|
+
t2=create_test("v3=v1*10") {|r| r['v3']==r['v1']*10}
|
337
|
+
# Fail!
|
338
|
+
t3=create_test("v4='b'") {|r| r['v4']=='b'}
|
339
|
+
exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
|
340
|
+
exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
|
341
|
+
res=ds.verify(t3,t1,t2)
|
342
|
+
assert_equal(exp1,res)
|
343
|
+
res=ds.verify('id',t1,t2,t3)
|
344
|
+
assert_equal(exp2,res)
|
345
|
+
end
|
346
|
+
def test_compute_operation
|
347
|
+
v1=[1,2,3,4].to_vector(:scale)
|
348
|
+
v2=[4,3,2,1].to_vector(:scale)
|
349
|
+
v3=[10,20,30,40].to_vector(:scale)
|
350
|
+
vscale=[1.quo(2),1,3.quo(2),2].to_vector(:scale)
|
351
|
+
vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
|
352
|
+
vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
|
353
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
|
354
|
+
assert_equal(vscale,ds.compute("v1/2"))
|
355
|
+
assert_equal(vsum,ds.compute("v1+v2+v3"))
|
356
|
+
assert_equal(vmult,ds.compute("v1*v2"))
|
357
|
+
|
358
|
+
end
|
359
|
+
def test_crosstab_with_asignation
|
360
|
+
v1=%w{a a a b b b c c c}.to_vector
|
361
|
+
v2=%w{a b c a b c a b c}.to_vector
|
362
|
+
v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
|
363
|
+
ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
|
364
|
+
assert_equal(:nominal, ds['_id'].type)
|
365
|
+
assert_equal(:scale, ds['a'].type)
|
366
|
+
assert_equal(:scale, ds['b'].type)
|
367
|
+
ev_id=%w{a b c}.to_vector
|
368
|
+
ev_a =%w{0 0 0}.to_scale
|
369
|
+
ev_b =%w{1 1 0}.to_scale
|
370
|
+
ev_c =%w{0 1 1}.to_scale
|
371
|
+
ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
|
372
|
+
assert_equal(ds, ds2)
|
373
|
+
end
|
374
|
+
def test_one_to_many
|
375
|
+
cases=[
|
376
|
+
['1','george','red',10,'blue',20,nil,nil],
|
377
|
+
['2','fred','green',15,'orange',30,'white',20],
|
378
|
+
['3','alfred',nil,nil,nil,nil,nil,nil]
|
379
|
+
]
|
380
|
+
ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
|
381
|
+
cases.each {|c| ds.add_case_array c }
|
382
|
+
ds.update_valid_data
|
383
|
+
ids=%w{1 1 2 2 2}.to_vector
|
384
|
+
colors=%w{red blue green orange white}.to_vector
|
385
|
+
values=[10,20,15,30,20].to_vector
|
386
|
+
col_ids=[1,2,1,2,3].to_scale
|
387
|
+
ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
|
388
|
+
assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
|
389
|
+
|
390
|
+
end
|
391
391
|
|
392
392
|
end
|