statsample 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -59,6 +59,7 @@ class StatsampleBivariateTestCase < Test::Unit::TestCase
59
59
  poly.method=:joint
60
60
  poly.compute
61
61
 
62
+
62
63
  assert_in_delta(0.4192, poly.r, 0.0001)
63
64
  assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
64
65
  assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
@@ -49,7 +49,4 @@ class StatsampleCrosstabTestCase < Test::Unit::TestCase
49
49
  ct=Statsample::Crosstab.new(v1,v2)
50
50
  assert_equal(Matrix[[2.5,2.5],[2.5,2.5]],ct.matrix_expected)
51
51
  end
52
- def test_to_s
53
- assert_match(/man\s+|\s+woman/,@ct.to_s)
54
- end
55
52
  end
data/test/test_dataset.rb CHANGED
@@ -3,390 +3,390 @@ require 'statsample'
3
3
  require 'test/unit'
4
4
  require 'tmpdir'
5
5
  class StatsampleDatasetTestCase < Test::Unit::TestCase
6
- def setup
7
- @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
8
- 'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
9
- 'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
10
- end
11
- def test_basic
12
- assert_equal(5,@ds.cases)
13
- assert_equal(%w{id name age city a1}, @ds.fields)
14
- end
15
- def test_saveload
16
- outfile=Dir::tmpdir+"/dataset.ds"
17
- @ds.save(outfile)
18
- a=Statsample.load(outfile)
19
- assert_equal(@ds,a)
20
- end
21
-
22
- def test_matrix
23
- matrix=Matrix[[1,2],[3,4],[5,6]]
24
- ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
25
- assert_equal(matrix,ds.to_matrix)
6
+ def setup
7
+ @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
8
+ 'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
9
+ 'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
10
+ end
11
+ def test_basic
12
+ assert_equal(5,@ds.cases)
13
+ assert_equal(%w{id name age city a1}, @ds.fields)
14
+ end
15
+ def test_saveload
16
+ outfile=Dir::tmpdir+"/dataset.ds"
17
+ @ds.save(outfile)
18
+ a=Statsample.load(outfile)
19
+ assert_equal(@ds,a)
20
+ end
21
+
22
+ def test_matrix
23
+ matrix=Matrix[[1,2],[3,4],[5,6]]
24
+ ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
25
+ assert_equal(matrix,ds.to_matrix)
26
+ end
27
+
28
+ def test_fields
29
+ @ds.fields=%w{name a1 id age city}
30
+ assert_equal(%w{name a1 id age city}, @ds.fields)
31
+ @ds.fields=%w{id name age}
32
+ assert_equal(%w{id name age a1 city}, @ds.fields)
33
+ end
34
+ def test_merge
35
+ a=[1,2,3].to_scale
36
+ b=[3,4,5].to_vector
37
+ c=[4,5,6].to_scale
38
+ d=[7,8,9].to_vector
39
+ e=[10,20,30].to_vector
40
+ ds1={'a'=>a,'b'=>b}.to_dataset
41
+ ds2={'c'=>c,'d'=>d}.to_dataset
42
+ exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
43
+
44
+ assert_equal(exp,ds1.merge(ds2))
45
+ exp.fields=%w{c d a b}
46
+ assert_equal(exp,ds2.merge(ds1))
47
+ ds3={'a'=>e}.to_dataset
48
+ exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
49
+ exp.fields=%w{a_1 b a_2}
50
+ assert_equal(exp,ds1.merge(ds3))
51
+ end
52
+ def test_each_vector
53
+ a=[1,2,3].to_vector
54
+ b=[3,4,5].to_vector
55
+ fields=["a","b"]
56
+ ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
57
+ res=[]
58
+ ds.each_vector{|k,v|
59
+ res.push([k,v])
60
+ }
61
+ assert_equal([["a",a],["b",b]],res)
62
+ ds.fields=["b","a"]
63
+ res=[]
64
+ ds.each_vector{|k,v|
65
+ res.push([k,v])
66
+ }
67
+ assert_equal([["b",b],["a",a]],res)
68
+ end
69
+ def test_equality
70
+ v1=[1,2,3,4].to_vector
71
+ v2=[5,6,7,8].to_vector
72
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
73
+ v3=[1,2,3,4].to_vector
74
+ v4=[5,6,7,8].to_vector
75
+ ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
76
+ assert_equal(ds1,ds2)
77
+ ds2.fields=%w{v1 v2}
78
+ assert_not_equal(ds1,ds2)
79
+ end
80
+ def test_add_vector
81
+ v=Statsample::Vector.new(%w{a b c d e})
82
+ @ds.add_vector('new',v)
83
+ assert_equal(%w{id name age city a1 new},@ds.fields)
84
+ x=Statsample::Vector.new(%w{a b c d e f g})
85
+ assert_raise ArgumentError do
86
+ @ds.add_vector('new2',x)
26
87
  end
88
+ end
89
+ def test_vector_by_calculation
90
+ a1=[1,2,3,4,5,6,7].to_vector(:scale)
91
+ a2=[10,20,30,40,50,60,70].to_vector(:scale)
92
+ a3=[100,200,300,400,500,600,700].to_vector(:scale)
93
+ ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
94
+ total=ds.vector_by_calculation() {|row|
95
+ row['a1']+row['a2']+row['a3']
96
+ }
97
+ expected=[111,222,333,444,555,666,777].to_vector(:scale)
98
+ assert_equal(expected,total)
99
+ end
100
+ def test_vector_sum
101
+ a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
102
+ a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
103
+ b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
104
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
105
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
106
+ total=ds.vector_sum
107
+ a=ds.vector_sum(['a1','a2'])
108
+ b=ds.vector_sum(['b1','b2'])
109
+ expected_a=[11,12,23,24,25,nil].to_vector(:scale)
110
+ expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
111
+ expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
112
+ assert_equal(expected_a, a)
113
+ assert_equal(expected_b, b)
114
+ assert_equal(expected_total, total)
115
+ end
116
+ def test_vector_missing_values
117
+ a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
118
+ a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
119
+ b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
120
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
121
+ c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
122
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
123
+ mva=[2,3,0,1,0,1].to_vector(:scale)
124
+ assert_equal(mva,ds.vector_missing_values)
125
+ end
126
+ def test_vector_count_characters
127
+ a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
128
+ a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
129
+ b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
130
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
131
+ c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
132
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
133
+ exp=[4,17,27,5,6,5].to_vector(:scale)
134
+ assert_equal(exp,ds.vector_count_characters)
27
135
 
28
- def test_fields
29
- @ds.fields=%w{name a1 id age city}
30
- assert_equal(%w{name a1 id age city}, @ds.fields)
31
- @ds.fields=%w{id name age}
32
- assert_equal(%w{id name age a1 city}, @ds.fields)
33
- end
34
- def test_merge
35
- a=[1,2,3].to_scale
36
- b=[3,4,5].to_vector
37
- c=[4,5,6].to_scale
38
- d=[7,8,9].to_vector
39
- e=[10,20,30].to_vector
40
- ds1={'a'=>a,'b'=>b}.to_dataset
41
- ds2={'c'=>c,'d'=>d}.to_dataset
42
- exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
43
-
44
- assert_equal(exp,ds1.merge(ds2))
45
- exp.fields=%w{c d a b}
46
- assert_equal(exp,ds2.merge(ds1))
47
- ds3={'a'=>e}.to_dataset
48
- exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
49
- exp.fields=%w{a_1 b a_2}
50
- assert_equal(exp,ds1.merge(ds3))
51
- end
52
- def test_each_vector
53
- a=[1,2,3].to_vector
54
- b=[3,4,5].to_vector
55
- fields=["a","b"]
56
- ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
57
- res=[]
58
- ds.each_vector{|k,v|
59
- res.push([k,v])
60
- }
61
- assert_equal([["a",a],["b",b]],res)
62
- ds.fields=["b","a"]
63
- res=[]
64
- ds.each_vector{|k,v|
65
- res.push([k,v])
66
- }
67
- assert_equal([["b",b],["a",a]],res)
68
- end
69
- def test_equality
70
- v1=[1,2,3,4].to_vector
71
- v2=[5,6,7,8].to_vector
72
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
73
- v3=[1,2,3,4].to_vector
74
- v4=[5,6,7,8].to_vector
75
- ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
76
- assert_equal(ds1,ds2)
77
- ds2.fields=%w{v1 v2}
78
- assert_not_equal(ds1,ds2)
79
- end
80
- def test_add_vector
81
- v=Statsample::Vector.new(%w{a b c d e})
82
- @ds.add_vector('new',v)
83
- assert_equal(%w{id name age city a1 new},@ds.fields)
84
- x=Statsample::Vector.new(%w{a b c d e f g})
85
- assert_raise ArgumentError do
86
- @ds.add_vector('new2',x)
87
- end
88
- end
89
- def test_vector_by_calculation
90
- a1=[1,2,3,4,5,6,7].to_vector(:scale)
91
- a2=[10,20,30,40,50,60,70].to_vector(:scale)
92
- a3=[100,200,300,400,500,600,700].to_vector(:scale)
93
- ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
94
- total=ds.vector_by_calculation() {|row|
95
- row['a1']+row['a2']+row['a3']
96
- }
97
- expected=[111,222,333,444,555,666,777].to_vector(:scale)
98
- assert_equal(expected,total)
99
- end
100
- def test_vector_sum
101
- a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
102
- a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
103
- b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
104
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
105
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
106
- total=ds.vector_sum
107
- a=ds.vector_sum(['a1','a2'])
108
- b=ds.vector_sum(['b1','b2'])
109
- expected_a=[11,12,23,24,25,nil].to_vector(:scale)
110
- expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
111
- expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
112
- assert_equal(expected_a, a)
113
- assert_equal(expected_b, b)
114
- assert_equal(expected_total, total)
115
- end
116
- def test_vector_missing_values
117
- a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
118
- a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
119
- b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
120
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
121
- c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
122
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
123
- mva=[2,3,0,1,0,1].to_vector(:scale)
124
- assert_equal(mva,ds.vector_missing_values)
125
- end
126
- def test_vector_count_characters
127
- a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
128
- a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
129
- b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
130
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
131
- c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
132
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
133
- exp=[4,17,27,5,6,5].to_vector(:scale)
134
- assert_equal(exp,ds.vector_count_characters)
135
-
136
- end
137
- def test_vector_mean
138
- a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
139
- a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
140
- b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
141
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
142
- c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
143
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
144
- total=ds.vector_mean
145
- a=ds.vector_mean(['a1','a2'],1)
146
- b=ds.vector_mean(['b1','b2'],1)
147
- c=ds.vector_mean(['b1','b2','c'],1)
148
- expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
149
- expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
150
- expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
151
- expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
152
- assert_equal(expected_a, a)
153
- assert_equal(expected_b, b)
154
- assert_equal(expected_c, c)
155
- assert_equal(expected_total, total)
156
- end
136
+ end
137
+ def test_vector_mean
138
+ a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
139
+ a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
140
+ b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
141
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
142
+ c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
143
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
144
+ total=ds.vector_mean
145
+ a=ds.vector_mean(['a1','a2'],1)
146
+ b=ds.vector_mean(['b1','b2'],1)
147
+ c=ds.vector_mean(['b1','b2','c'],1)
148
+ expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
149
+ expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
150
+ expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
151
+ expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
152
+ assert_equal(expected_a, a)
153
+ assert_equal(expected_b, b)
154
+ assert_equal(expected_c, c)
155
+ assert_equal(expected_total, total)
156
+ end
157
+
158
+ def test_each_array
159
+ expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
160
+ out=[]
161
+ @ds.each_array{ |a|
162
+ out.push(a)
163
+ }
164
+ assert_equal(expected,out)
165
+ end
166
+ def test_recode
167
+ @ds['age'].type=:scale
168
+ @ds.recode!("age") {|c| c['id']*2}
169
+ expected=[2,4,6,8,10].to_vector(:scale)
170
+ assert_equal(expected,@ds['age'])
171
+ end
172
+ def test_case_as
173
+ assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds.case_as_hash(0))
174
+ assert_equal([5,'George',5,'Tome','a,b,c'],@ds.case_as_array(4))
175
+ # Native methods
176
+ assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds._case_as_hash(0))
177
+ assert_equal([5,'George',5,'Tome','a,b,c'],@ds._case_as_array(4))
178
+
157
179
 
158
- def test_each_array
159
- expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
160
- out=[]
161
- @ds.each_array{ |a|
162
- out.push(a)
163
- }
164
- assert_equal(expected,out)
165
- end
166
- def test_recode
167
- @ds['age'].type=:scale
168
- @ds.recode!("age") {|c| c['id']*2}
169
- expected=[2,4,6,8,10].to_vector(:scale)
170
- assert_equal(expected,@ds['age'])
171
- end
172
- def test_case_as
173
- assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds.case_as_hash(0))
174
- assert_equal([5,'George',5,'Tome','a,b,c'],@ds.case_as_array(4))
175
- # Native methods
176
- assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds._case_as_hash(0))
177
- assert_equal([5,'George',5,'Tome','a,b,c'],@ds._case_as_array(4))
178
-
179
-
180
-
181
- end
182
- def test_delete_vector
183
- @ds.delete_vector('name')
184
- assert_equal(%w{id age city a1},@ds.fields)
185
- assert_equal(%w{a1 age city id},@ds.vectors.keys.sort)
186
- end
187
- def test_change_type
188
- @ds.col('age').type=:scale
189
- assert_equal(:scale,@ds.col('age').type)
190
- end
191
- def test_split_by_separator_recode
192
- @ds.add_vectors_by_split_recode("a1","_")
193
- assert_equal(%w{id name age city a1 a1_1 a1_2 a1_3},@ds.fields)
194
- assert_equal([1,0,1,nil,1],@ds.col('a1_1').to_a)
195
- assert_equal([1,1,0,nil,1],@ds.col('a1_2').to_a)
196
- assert_equal([0,1,0,nil,1],@ds.col('a1_3').to_a)
197
- assert_equal({'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'},@ds.labels)
198
- end
199
- def test_split_by_separator
200
- @ds.add_vectors_by_split("a1","_")
201
- assert_equal(%w{id name age city a1 a1_a a1_b a1_c},@ds.fields)
202
- assert_equal([1,0,1,nil,1],@ds.col('a1_a').to_a)
203
- assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
204
- assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
205
- end
206
- def test_percentiles
207
- v1=(1..100).to_a.to_scale
208
- assert_equal(50.5,v1.median)
209
- assert_equal(25.5, v1.percentil(25))
210
- v2=(1..99).to_a.to_scale
211
- assert_equal(50,v2.median)
212
- assert_equal(25,v2.percentil(25))
213
- v3=(1..50).to_a.to_scale
214
- assert_equal(25.5, v3.median)
215
- assert_equal(13, v3.percentil(25))
216
-
217
- end
218
- def test_add_case
219
- ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
220
- ds.add_case([1,2,3])
221
- ds.add_case({'a'=>4,'b'=>5,'c'=>6})
222
- ds.add_case([[7,8,9],%w{a b c}])
223
- assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
224
- assert_equal([4,5,6],ds.case_as_array(1))
225
- assert_equal([7,8,9],ds.case_as_array(2))
226
- assert_equal(['a','b','c'],ds.case_as_array(3))
227
- ds.add_case_array([6,7,1])
228
- ds.update_valid_data
229
- assert_equal([6,7,1],ds.case_as_array(4))
230
-
231
- end
232
- def test_marshaling
233
- ds_marshal=Marshal.load(Marshal.dump(@ds))
234
- assert_equal(ds_marshal,@ds)
235
- end
236
- def test_range
237
- v1=[1,2,3,4].to_vector
238
- v2=[5,6,7,8].to_vector
239
- v3=[9,10,11,12].to_vector
240
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
241
- assert_same(v1,ds1['v1'])
242
- ds2=ds1["v2".."v1"]
243
- assert_equal(%w{v2 v1},ds2.fields)
244
- assert_same(ds1['v1'],ds2['v1'])
245
- assert_same(ds1['v2'],ds2['v2'])
246
-
247
-
248
- end
249
- def test_dup
250
- v1=[1,2,3,4].to_vector
251
- v2=[5,6,7,8].to_vector
252
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
253
- ds2=ds1.dup
254
- assert_equal(ds1,ds2)
255
- assert_not_same(ds1,ds2)
256
- assert_equal(ds1['v1'],ds2['v1'])
257
- assert_not_same(ds1['v1'],ds2['v1'])
258
- assert_equal(ds1.fields,ds2.fields)
259
- assert_not_same(ds1.fields,ds2.fields)
260
- ds1['v1'].type=:scale
261
- # dup partial
262
- ds3=ds1.dup('v1')
263
- ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
264
- assert_equal(ds_exp,ds3)
265
- assert_not_same(ds_exp,ds3)
266
- assert_equal(ds3['v1'],ds_exp['v1'])
267
- assert_not_same(ds3['v1'],ds_exp['v1'])
268
- assert_equal(ds3.fields,ds_exp.fields)
269
- assert_not_same(ds3.fields,ds_exp.fields)
270
-
271
-
272
- # empty
273
- ds3=ds1.dup_empty
274
- assert_not_equal(ds1,ds3)
275
- assert_not_equal(ds1['v1'],ds3['v1'])
276
- assert_equal([],ds3['v1'].data)
277
- assert_equal([],ds3['v2'].data)
278
- assert_equal(:scale,ds3['v1'].type)
279
- assert_equal(ds1.fields,ds2.fields)
280
- assert_not_same(ds1.fields,ds2.fields)
281
- end
282
- def test_from_to
283
- assert_equal(%w{name age city}, @ds.from_to("name","city"))
284
- assert_raise ArgumentError do
285
- @ds.from_to("name","a2")
286
- end
180
+
181
+ end
182
+ def test_delete_vector
183
+ @ds.delete_vector('name')
184
+ assert_equal(%w{id age city a1},@ds.fields)
185
+ assert_equal(%w{a1 age city id},@ds.vectors.keys.sort)
186
+ end
187
+ def test_change_type
188
+ @ds.col('age').type=:scale
189
+ assert_equal(:scale,@ds.col('age').type)
190
+ end
191
+ def test_split_by_separator_recode
192
+ @ds.add_vectors_by_split_recode("a1","_")
193
+ assert_equal(%w{id name age city a1 a1_1 a1_2 a1_3},@ds.fields)
194
+ assert_equal([1,0,1,nil,1],@ds.col('a1_1').to_a)
195
+ assert_equal([1,1,0,nil,1],@ds.col('a1_2').to_a)
196
+ assert_equal([0,1,0,nil,1],@ds.col('a1_3').to_a)
197
+ assert_equal({'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'},@ds.labels)
198
+ end
199
+ def test_split_by_separator
200
+ @ds.add_vectors_by_split("a1","_")
201
+ assert_equal(%w{id name age city a1 a1_a a1_b a1_c},@ds.fields)
202
+ assert_equal([1,0,1,nil,1],@ds.col('a1_a').to_a)
203
+ assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
204
+ assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
205
+ end
206
+ def test_percentiles
207
+ v1=(1..100).to_a.to_scale
208
+ assert_equal(50.5,v1.median)
209
+ assert_equal(25.5, v1.percentil(25))
210
+ v2=(1..99).to_a.to_scale
211
+ assert_equal(50,v2.median)
212
+ assert_equal(25,v2.percentil(25))
213
+ v3=(1..50).to_a.to_scale
214
+ assert_equal(25.5, v3.median)
215
+ assert_equal(13, v3.percentil(25))
216
+
217
+ end
218
+ def test_add_case
219
+ ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
220
+ ds.add_case([1,2,3])
221
+ ds.add_case({'a'=>4,'b'=>5,'c'=>6})
222
+ ds.add_case([[7,8,9],%w{a b c}])
223
+ assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
224
+ assert_equal([4,5,6],ds.case_as_array(1))
225
+ assert_equal([7,8,9],ds.case_as_array(2))
226
+ assert_equal(['a','b','c'],ds.case_as_array(3))
227
+ ds.add_case_array([6,7,1])
228
+ ds.update_valid_data
229
+ assert_equal([6,7,1],ds.case_as_array(4))
230
+
231
+ end
232
+ def test_marshaling
233
+ ds_marshal=Marshal.load(Marshal.dump(@ds))
234
+ assert_equal(ds_marshal,@ds)
235
+ end
236
+ def test_range
237
+ v1=[1,2,3,4].to_vector
238
+ v2=[5,6,7,8].to_vector
239
+ v3=[9,10,11,12].to_vector
240
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
241
+ assert_same(v1,ds1['v1'])
242
+ ds2=ds1["v2".."v1"]
243
+ assert_equal(%w{v2 v1},ds2.fields)
244
+ assert_same(ds1['v1'],ds2['v1'])
245
+ assert_same(ds1['v2'],ds2['v2'])
246
+
247
+
248
+ end
249
+ def test_dup
250
+ v1=[1,2,3,4].to_vector
251
+ v2=[5,6,7,8].to_vector
252
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
253
+ ds2=ds1.dup
254
+ assert_equal(ds1,ds2)
255
+ assert_not_same(ds1,ds2)
256
+ assert_equal(ds1['v1'],ds2['v1'])
257
+ assert_not_same(ds1['v1'],ds2['v1'])
258
+ assert_equal(ds1.fields,ds2.fields)
259
+ assert_not_same(ds1.fields,ds2.fields)
260
+ ds1['v1'].type=:scale
261
+ # dup partial
262
+ ds3=ds1.dup('v1')
263
+ ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
264
+ assert_equal(ds_exp,ds3)
265
+ assert_not_same(ds_exp,ds3)
266
+ assert_equal(ds3['v1'],ds_exp['v1'])
267
+ assert_not_same(ds3['v1'],ds_exp['v1'])
268
+ assert_equal(ds3.fields,ds_exp.fields)
269
+ assert_not_same(ds3.fields,ds_exp.fields)
270
+
271
+
272
+ # empty
273
+ ds3=ds1.dup_empty
274
+ assert_not_equal(ds1,ds3)
275
+ assert_not_equal(ds1['v1'],ds3['v1'])
276
+ assert_equal([],ds3['v1'].data)
277
+ assert_equal([],ds3['v2'].data)
278
+ assert_equal(:scale,ds3['v1'].type)
279
+ assert_equal(ds1.fields,ds2.fields)
280
+ assert_not_same(ds1.fields,ds2.fields)
281
+ end
282
+ def test_from_to
283
+ assert_equal(%w{name age city}, @ds.from_to("name","city"))
284
+ assert_raise ArgumentError do
285
+ @ds.from_to("name","a2")
287
286
  end
288
- def test_each_array_with_nils
289
- v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
290
- v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
291
- v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
292
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
293
- ds2=ds1.dup_empty
294
- ds1.each_array_with_nils {|row|
295
- ds2.add_case_array(row)
296
- }
297
- ds2.update_valid_data
298
- assert_equal([1,nil,3,4,nil],ds2['v1'].data)
299
- assert_equal([5,6,nil,8,20],ds2['v2'].data)
300
- end
301
- def test_dup_only_valid
302
- v1=[1,nil,3,4].to_vector(:scale)
303
- v2=[5,6,nil,8].to_vector(:scale)
304
- v3=[9,10,11,12].to_vector(:scale)
287
+ end
288
+ def test_each_array_with_nils
289
+ v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
290
+ v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
291
+ v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
305
292
  ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
306
- ds2=ds1.dup_only_valid
307
- expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
308
- assert_equal(expected,ds2)
309
- assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
310
- end
311
- def test_filter
312
- @ds['age'].type=:scale
313
- filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
314
- expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
315
- 'city'=>Statsample::Vector.new(['London','Paris']),
316
- 'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
317
- assert_equal(expected,filtered)
318
- end
319
- def test_filter_field
320
- @ds['age'].type=:scale
321
- filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
322
- expected=[2,4].to_vector
323
- assert_equal(expected,filtered)
324
-
325
- end
326
- def test_verify
327
- name=%w{r1 r2 r3 r4}.to_vector(:nominal)
328
- v1=[1,2,3,4].to_vector(:scale)
329
- v2=[4,3,2,1].to_vector(:scale)
330
- v3=[10,20,30,40].to_vector(:scale)
331
- v4=%w{a b a b}.to_vector(:nominal)
332
- ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
333
- ds.fields=%w{v1 v2 v3 v4 id}
334
- #Correct
335
- t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
336
- t2=create_test("v3=v1*10") {|r| r['v3']==r['v1']*10}
337
- # Fail!
338
- t3=create_test("v4='b'") {|r| r['v4']=='b'}
339
- exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
340
- exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
341
- res=ds.verify(t3,t1,t2)
342
- assert_equal(exp1,res)
343
- res=ds.verify('id',t1,t2,t3)
344
- assert_equal(exp2,res)
345
- end
346
- def test_compute_operation
347
- v1=[1,2,3,4].to_vector(:scale)
348
- v2=[4,3,2,1].to_vector(:scale)
349
- v3=[10,20,30,40].to_vector(:scale)
350
- vscale=[1.quo(2),1,3.quo(2),2].to_vector(:scale)
351
- vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
352
- vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
353
- ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
354
- assert_equal(vscale,ds.compute("v1/2"))
355
- assert_equal(vsum,ds.compute("v1+v2+v3"))
356
- assert_equal(vmult,ds.compute("v1*v2"))
357
-
358
- end
359
- def test_crosstab_with_asignation
360
- v1=%w{a a a b b b c c c}.to_vector
361
- v2=%w{a b c a b c a b c}.to_vector
362
- v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
363
- ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
364
- assert_equal(:nominal, ds['_id'].type)
365
- assert_equal(:scale, ds['a'].type)
366
- assert_equal(:scale, ds['b'].type)
367
- ev_id=%w{a b c}.to_vector
368
- ev_a =%w{0 0 0}.to_scale
369
- ev_b =%w{1 1 0}.to_scale
370
- ev_c =%w{0 1 1}.to_scale
371
- ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
372
- assert_equal(ds, ds2)
373
- end
374
- def test_one_to_many
375
- cases=[
376
- ['1','george','red',10,'blue',20,nil,nil],
377
- ['2','fred','green',15,'orange',30,'white',20],
378
- ['3','alfred',nil,nil,nil,nil,nil,nil]
379
- ]
380
- ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
381
- cases.each {|c| ds.add_case_array c }
382
- ds.update_valid_data
383
- ids=%w{1 1 2 2 2}.to_vector
384
- colors=%w{red blue green orange white}.to_vector
385
- values=[10,20,15,30,20].to_vector
386
- col_ids=[1,2,1,2,3].to_scale
387
- ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
388
- assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
389
-
293
+ ds2=ds1.dup_empty
294
+ ds1.each_array_with_nils {|row|
295
+ ds2.add_case_array(row)
296
+ }
297
+ ds2.update_valid_data
298
+ assert_equal([1,nil,3,4,nil],ds2['v1'].data)
299
+ assert_equal([5,6,nil,8,20],ds2['v2'].data)
390
300
  end
301
+ def test_dup_only_valid
302
+ v1=[1,nil,3,4].to_vector(:scale)
303
+ v2=[5,6,nil,8].to_vector(:scale)
304
+ v3=[9,10,11,12].to_vector(:scale)
305
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
306
+ ds2=ds1.dup_only_valid
307
+ expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
308
+ assert_equal(expected,ds2)
309
+ assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
310
+ end
311
+ def test_filter
312
+ @ds['age'].type=:scale
313
+ filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
314
+ expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
315
+ 'city'=>Statsample::Vector.new(['London','Paris']),
316
+ 'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
317
+ assert_equal(expected,filtered)
318
+ end
319
+ def test_filter_field
320
+ @ds['age'].type=:scale
321
+ filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
322
+ expected=[2,4].to_vector
323
+ assert_equal(expected,filtered)
324
+
325
+ end
326
+ def test_verify
327
+ name=%w{r1 r2 r3 r4}.to_vector(:nominal)
328
+ v1=[1,2,3,4].to_vector(:scale)
329
+ v2=[4,3,2,1].to_vector(:scale)
330
+ v3=[10,20,30,40].to_vector(:scale)
331
+ v4=%w{a b a b}.to_vector(:nominal)
332
+ ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
333
+ ds.fields=%w{v1 v2 v3 v4 id}
334
+ #Correct
335
+ t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
336
+ t2=create_test("v3=v1*10") {|r| r['v3']==r['v1']*10}
337
+ # Fail!
338
+ t3=create_test("v4='b'") {|r| r['v4']=='b'}
339
+ exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
340
+ exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
341
+ res=ds.verify(t3,t1,t2)
342
+ assert_equal(exp1,res)
343
+ res=ds.verify('id',t1,t2,t3)
344
+ assert_equal(exp2,res)
345
+ end
346
+ def test_compute_operation
347
+ v1=[1,2,3,4].to_vector(:scale)
348
+ v2=[4,3,2,1].to_vector(:scale)
349
+ v3=[10,20,30,40].to_vector(:scale)
350
+ vscale=[1.quo(2),1,3.quo(2),2].to_vector(:scale)
351
+ vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
352
+ vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
353
+ ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
354
+ assert_equal(vscale,ds.compute("v1/2"))
355
+ assert_equal(vsum,ds.compute("v1+v2+v3"))
356
+ assert_equal(vmult,ds.compute("v1*v2"))
357
+
358
+ end
359
+ def test_crosstab_with_asignation
360
+ v1=%w{a a a b b b c c c}.to_vector
361
+ v2=%w{a b c a b c a b c}.to_vector
362
+ v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
363
+ ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
364
+ assert_equal(:nominal, ds['_id'].type)
365
+ assert_equal(:scale, ds['a'].type)
366
+ assert_equal(:scale, ds['b'].type)
367
+ ev_id=%w{a b c}.to_vector
368
+ ev_a =%w{0 0 0}.to_scale
369
+ ev_b =%w{1 1 0}.to_scale
370
+ ev_c =%w{0 1 1}.to_scale
371
+ ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
372
+ assert_equal(ds, ds2)
373
+ end
374
+ def test_one_to_many
375
+ cases=[
376
+ ['1','george','red',10,'blue',20,nil,nil],
377
+ ['2','fred','green',15,'orange',30,'white',20],
378
+ ['3','alfred',nil,nil,nil,nil,nil,nil]
379
+ ]
380
+ ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
381
+ cases.each {|c| ds.add_case_array c }
382
+ ds.update_valid_data
383
+ ids=%w{1 1 2 2 2}.to_vector
384
+ colors=%w{red blue green orange white}.to_vector
385
+ values=[10,20,15,30,20].to_vector
386
+ col_ids=[1,2,1,2,3].to_scale
387
+ ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
388
+ assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
389
+
390
+ end
391
391
 
392
392
  end