statsample 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -0
- data/Manifest.txt +15 -9
- data/README.txt +6 -0
- data/Rakefile +8 -0
- data/{demo → examples}/correlation_matrix.rb +0 -0
- data/{demo/dominanceanalysis.rb → examples/dominance_analysis.rb} +0 -0
- data/{demo → examples}/dominance_analysis_bootstrap.rb +0 -0
- data/{demo → examples}/levene.rb +0 -0
- data/{demo → examples}/multiple_regression.rb +5 -3
- data/{demo → examples}/multivariate_correlation.rb +0 -0
- data/{demo → examples}/polychoric.rb +0 -0
- data/{demo → examples}/principal_axis.rb +0 -0
- data/examples/t_test.rb +11 -0
- data/{demo → examples}/tetrachoric.rb +0 -0
- data/lib/statistics2.rb +1 -1
- data/lib/statsample.rb +57 -6
- data/lib/statsample/bivariate/polychoric.rb +12 -25
- data/lib/statsample/bivariate/tetrachoric.rb +1 -3
- data/lib/statsample/converter/csv.rb +11 -12
- data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -3
- data/lib/statsample/factor/principalaxis.rb +0 -2
- data/lib/statsample/factor/rotation.rb +6 -8
- data/lib/statsample/graph.rb +8 -0
- data/lib/statsample/graph/svggraph.rb +0 -4
- data/lib/statsample/regression/multiple/baseengine.rb +25 -28
- data/lib/statsample/regression/multiple/matrixengine.rb +30 -34
- data/lib/statsample/test.rb +36 -1
- data/lib/statsample/test/levene.rb +11 -7
- data/lib/statsample/test/t.rb +189 -0
- data/test/test_anova.rb +8 -10
- data/test/test_bivariate.rb +40 -37
- data/test/test_codification.rb +9 -13
- data/test/test_combination.rb +37 -39
- data/test/test_crosstab.rb +46 -48
- data/test/test_csv.rb +40 -45
- data/test/test_dataset.rb +150 -152
- data/test/test_distribution.rb +24 -21
- data/test/test_dominance_analysis.rb +10 -12
- data/test/test_factor.rb +95 -91
- data/test/test_ggobi.rb +30 -33
- data/test/test_gsl.rb +4 -4
- data/test/test_helpers.rb +26 -0
- data/test/test_histogram.rb +5 -6
- data/test/test_logit.rb +20 -21
- data/test/test_matrix.rb +47 -48
- data/test/test_mle.rb +130 -131
- data/test/test_multiset.rb +95 -96
- data/test/test_permutation.rb +35 -36
- data/test/test_promise_after.rb +39 -0
- data/test/test_regression.rb +49 -51
- data/test/test_reliability.rb +29 -30
- data/test/test_resample.rb +22 -23
- data/test/test_srs.rb +8 -9
- data/test/test_statistics.rb +12 -6
- data/test/test_stest.rb +18 -10
- data/test/test_stratified.rb +15 -16
- data/test/test_svg_graph.rb +11 -22
- data/test/test_test_t.rb +40 -0
- data/test/test_umannwhitney.rb +14 -15
- data/test/test_vector.rb +33 -37
- data/test/test_xls.rb +34 -41
- metadata +22 -11
data/test/test_dataset.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
require 'tempfile'
|
5
|
-
class StatsampleDatasetTestCase < Test::Unit::TestCase
|
1
|
+
require(File.dirname(__FILE__)+'/test_helpers.rb')
|
2
|
+
|
3
|
+
class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
|
6
4
|
def setup
|
7
5
|
@ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
|
8
|
-
|
6
|
+
'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
|
9
7
|
'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
|
10
8
|
end
|
11
9
|
def test_basic
|
@@ -14,40 +12,40 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
14
12
|
end
|
15
13
|
def test_saveload
|
16
14
|
outfile=Tempfile.new("dataset.ds")
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
@ds.save(outfile.path)
|
16
|
+
a=Statsample.load(outfile.path)
|
17
|
+
assert_equal(@ds,a)
|
20
18
|
end
|
21
|
-
|
19
|
+
|
22
20
|
def test_matrix
|
23
21
|
matrix=Matrix[[1,2],[3,4],[5,6]]
|
24
22
|
ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
|
25
23
|
assert_equal(matrix,ds.to_matrix)
|
26
24
|
end
|
27
|
-
|
25
|
+
|
28
26
|
def test_fields
|
29
27
|
@ds.fields=%w{name a1 id age city}
|
30
28
|
assert_equal(%w{name a1 id age city}, @ds.fields)
|
31
29
|
@ds.fields=%w{id name age}
|
32
|
-
assert_equal(%w{id name age a1 city}, @ds.fields)
|
30
|
+
assert_equal(%w{id name age a1 city}, @ds.fields)
|
33
31
|
end
|
34
32
|
def test_merge
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
33
|
+
a=[1,2,3].to_scale
|
34
|
+
b=[3,4,5].to_vector
|
35
|
+
c=[4,5,6].to_scale
|
36
|
+
d=[7,8,9].to_vector
|
37
|
+
e=[10,20,30].to_vector
|
38
|
+
ds1={'a'=>a,'b'=>b}.to_dataset
|
39
|
+
ds2={'c'=>c,'d'=>d}.to_dataset
|
40
|
+
exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
41
|
+
|
42
|
+
assert_equal(exp,ds1.merge(ds2))
|
43
|
+
exp.fields=%w{c d a b}
|
44
|
+
assert_equal(exp,ds2.merge(ds1))
|
45
|
+
ds3={'a'=>e}.to_dataset
|
46
|
+
exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
|
47
|
+
exp.fields=%w{a_1 b a_2}
|
48
|
+
assert_equal(exp,ds1.merge(ds3))
|
51
49
|
end
|
52
50
|
def test_each_vector
|
53
51
|
a=[1,2,3].to_vector
|
@@ -56,13 +54,13 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
56
54
|
ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
|
57
55
|
res=[]
|
58
56
|
ds.each_vector{|k,v|
|
59
|
-
|
57
|
+
res.push([k,v])
|
60
58
|
}
|
61
59
|
assert_equal([["a",a],["b",b]],res)
|
62
60
|
ds.fields=["b","a"]
|
63
61
|
res=[]
|
64
62
|
ds.each_vector{|k,v|
|
65
|
-
|
63
|
+
res.push([k,v])
|
66
64
|
}
|
67
65
|
assert_equal([["b",b],["a",a]],res)
|
68
66
|
end
|
@@ -82,84 +80,84 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
82
80
|
@ds.add_vector('new',v)
|
83
81
|
assert_equal(%w{id name age city a1 new},@ds.fields)
|
84
82
|
x=Statsample::Vector.new(%w{a b c d e f g})
|
85
|
-
assert_raise ArgumentError do
|
86
|
-
|
83
|
+
assert_raise ArgumentError do
|
84
|
+
@ds.add_vector('new2',x)
|
87
85
|
end
|
88
86
|
end
|
89
87
|
def test_vector_by_calculation
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
88
|
+
a1=[1,2,3,4,5,6,7].to_vector(:scale)
|
89
|
+
a2=[10,20,30,40,50,60,70].to_vector(:scale)
|
90
|
+
a3=[100,200,300,400,500,600,700].to_vector(:scale)
|
91
|
+
ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
|
92
|
+
total=ds.vector_by_calculation() {|row|
|
93
|
+
row['a1']+row['a2']+row['a3']
|
94
|
+
}
|
95
|
+
expected=[111,222,333,444,555,666,777].to_vector(:scale)
|
96
|
+
assert_equal(expected,total)
|
99
97
|
end
|
100
98
|
def test_vector_sum
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
99
|
+
a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
|
100
|
+
a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
|
101
|
+
b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
|
102
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
103
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
|
104
|
+
total=ds.vector_sum
|
105
|
+
a=ds.vector_sum(['a1','a2'])
|
106
|
+
b=ds.vector_sum(['b1','b2'])
|
107
|
+
expected_a=[11,12,23,24,25,nil].to_vector(:scale)
|
108
|
+
expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
|
109
|
+
expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
|
110
|
+
assert_equal(expected_a, a)
|
111
|
+
assert_equal(expected_b, b)
|
112
|
+
assert_equal(expected_total, total)
|
115
113
|
end
|
116
114
|
def test_vector_missing_values
|
117
115
|
a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
|
118
|
-
|
119
|
-
|
120
|
-
|
116
|
+
a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
|
117
|
+
b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
|
118
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
121
119
|
c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
|
122
|
-
|
120
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
123
121
|
mva=[2,3,0,1,0,1].to_vector(:scale)
|
124
122
|
assert_equal(mva,ds.vector_missing_values)
|
125
123
|
end
|
126
124
|
def test_vector_count_characters
|
127
125
|
a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
|
128
|
-
|
129
|
-
|
130
|
-
|
126
|
+
a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
|
127
|
+
b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
|
128
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
131
129
|
c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
|
132
|
-
|
130
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
133
131
|
exp=[4,17,27,5,6,5].to_vector(:scale)
|
134
132
|
assert_equal(exp,ds.vector_count_characters)
|
135
|
-
|
133
|
+
|
136
134
|
end
|
137
135
|
def test_vector_mean
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
136
|
+
a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
|
137
|
+
a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
|
138
|
+
b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
|
139
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
142
140
|
c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
141
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
142
|
+
total=ds.vector_mean
|
143
|
+
a=ds.vector_mean(['a1','a2'],1)
|
144
|
+
b=ds.vector_mean(['b1','b2'],1)
|
147
145
|
c=ds.vector_mean(['b1','b2','c'],1)
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
end
|
157
|
-
|
146
|
+
expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
|
147
|
+
expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
|
148
|
+
expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
|
149
|
+
expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
|
150
|
+
assert_equal(expected_a, a)
|
151
|
+
assert_equal(expected_b, b)
|
152
|
+
assert_equal(expected_c, c)
|
153
|
+
assert_equal(expected_total, total)
|
154
|
+
end
|
155
|
+
|
158
156
|
def test_each_array
|
159
157
|
expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
|
160
158
|
out=[]
|
161
159
|
@ds.each_array{ |a|
|
162
|
-
|
160
|
+
out.push(a)
|
163
161
|
}
|
164
162
|
assert_equal(expected,out)
|
165
163
|
end
|
@@ -175,9 +173,9 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
175
173
|
# Native methods
|
176
174
|
assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds._case_as_hash(0))
|
177
175
|
assert_equal([5,'George',5,'Tome','a,b,c'],@ds._case_as_array(4))
|
178
|
-
|
179
|
-
|
180
|
-
|
176
|
+
|
177
|
+
|
178
|
+
|
181
179
|
end
|
182
180
|
def test_delete_vector
|
183
181
|
@ds.delete_vector('name')
|
@@ -204,16 +202,16 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
204
202
|
assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
|
205
203
|
end
|
206
204
|
def test_percentiles
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
205
|
+
v1=(1..100).to_a.to_scale
|
206
|
+
assert_equal(50.5,v1.median)
|
207
|
+
assert_equal(25.5, v1.percentil(25))
|
208
|
+
v2=(1..99).to_a.to_scale
|
209
|
+
assert_equal(50,v2.median)
|
210
|
+
assert_equal(25,v2.percentil(25))
|
211
|
+
v3=(1..50).to_a.to_scale
|
212
|
+
assert_equal(25.5, v3.median)
|
213
|
+
assert_equal(13, v3.percentil(25))
|
214
|
+
|
217
215
|
end
|
218
216
|
def test_add_case
|
219
217
|
ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
|
@@ -227,7 +225,7 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
227
225
|
ds.add_case_array([6,7,1])
|
228
226
|
ds.update_valid_data
|
229
227
|
assert_equal([6,7,1],ds.case_as_array(4))
|
230
|
-
|
228
|
+
|
231
229
|
end
|
232
230
|
def test_marshaling
|
233
231
|
ds_marshal=Marshal.load(Marshal.dump(@ds))
|
@@ -243,8 +241,8 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
243
241
|
assert_equal(%w{v2 v1},ds2.fields)
|
244
242
|
assert_same(ds1['v1'],ds2['v1'])
|
245
243
|
assert_same(ds1['v2'],ds2['v2'])
|
246
|
-
|
247
|
-
|
244
|
+
|
245
|
+
|
248
246
|
end
|
249
247
|
def test_dup
|
250
248
|
v1=[1,2,3,4].to_vector
|
@@ -267,8 +265,8 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
267
265
|
assert_not_same(ds3['v1'],ds_exp['v1'])
|
268
266
|
assert_equal(ds3.fields,ds_exp.fields)
|
269
267
|
assert_not_same(ds3.fields,ds_exp.fields)
|
270
|
-
|
271
|
-
|
268
|
+
|
269
|
+
|
272
270
|
# empty
|
273
271
|
ds3=ds1.dup_empty
|
274
272
|
assert_not_equal(ds1,ds3)
|
@@ -282,22 +280,22 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
282
280
|
def test_from_to
|
283
281
|
assert_equal(%w{name age city}, @ds.from_to("name","city"))
|
284
282
|
assert_raise ArgumentError do
|
285
|
-
|
283
|
+
@ds.from_to("name","a2")
|
286
284
|
end
|
287
285
|
end
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
286
|
+
def test_each_array_with_nils
|
287
|
+
v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
|
288
|
+
v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
|
289
|
+
v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
|
290
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
|
291
|
+
ds2=ds1.dup_empty
|
292
|
+
ds1.each_array_with_nils {|row|
|
293
|
+
ds2.add_case_array(row)
|
294
|
+
}
|
295
|
+
ds2.update_valid_data
|
296
|
+
assert_equal([1,nil,3,4,nil],ds2['v1'].data)
|
297
|
+
assert_equal([5,6,nil,8,20],ds2['v2'].data)
|
298
|
+
end
|
301
299
|
def test_dup_only_valid
|
302
300
|
v1=[1,nil,3,4].to_vector(:scale)
|
303
301
|
v2=[5,6,nil,8].to_vector(:scale)
|
@@ -306,22 +304,22 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
306
304
|
ds2=ds1.dup_only_valid
|
307
305
|
expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
|
308
306
|
assert_equal(expected,ds2)
|
309
|
-
|
307
|
+
assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
|
310
308
|
end
|
311
309
|
def test_filter
|
312
310
|
@ds['age'].type=:scale
|
313
311
|
filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
|
314
312
|
expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
|
315
|
-
|
313
|
+
'city'=>Statsample::Vector.new(['London','Paris']),
|
316
314
|
'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
|
317
315
|
assert_equal(expected,filtered)
|
318
|
-
end
|
316
|
+
end
|
319
317
|
def test_filter_field
|
320
|
-
|
318
|
+
@ds['age'].type=:scale
|
321
319
|
filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
|
322
320
|
expected=[2,4].to_vector
|
323
321
|
assert_equal(expected,filtered)
|
324
|
-
|
322
|
+
|
325
323
|
end
|
326
324
|
def test_verify
|
327
325
|
name=%w{r1 r2 r3 r4}.to_vector(:nominal)
|
@@ -351,42 +349,42 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
351
349
|
vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
|
352
350
|
vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
|
353
351
|
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
|
354
|
-
assert_equal(vscale,ds.compute("v1/2"))
|
352
|
+
assert_equal(vscale,ds.compute("v1/2"))
|
355
353
|
assert_equal(vsum,ds.compute("v1+v2+v3"))
|
356
354
|
assert_equal(vmult,ds.compute("v1*v2"))
|
357
|
-
|
355
|
+
|
358
356
|
end
|
359
357
|
def test_crosstab_with_asignation
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
358
|
+
v1=%w{a a a b b b c c c}.to_vector
|
359
|
+
v2=%w{a b c a b c a b c}.to_vector
|
360
|
+
v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
|
361
|
+
ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
|
362
|
+
assert_equal(:nominal, ds['_id'].type)
|
363
|
+
assert_equal(:scale, ds['a'].type)
|
364
|
+
assert_equal(:scale, ds['b'].type)
|
365
|
+
ev_id=%w{a b c}.to_vector
|
366
|
+
ev_a =%w{0 0 0}.to_scale
|
367
|
+
ev_b =%w{1 1 0}.to_scale
|
368
|
+
ev_c =%w{0 1 1}.to_scale
|
369
|
+
ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
|
370
|
+
assert_equal(ds, ds2)
|
373
371
|
end
|
374
372
|
def test_one_to_many
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
end
|
391
|
-
|
373
|
+
cases=[
|
374
|
+
['1','george','red',10,'blue',20,nil,nil],
|
375
|
+
['2','fred','green',15,'orange',30,'white',20],
|
376
|
+
['3','alfred',nil,nil,nil,nil,nil,nil]
|
377
|
+
]
|
378
|
+
ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
|
379
|
+
cases.each {|c| ds.add_case_array c }
|
380
|
+
ds.update_valid_data
|
381
|
+
ids=%w{1 1 2 2 2}.to_vector
|
382
|
+
colors=%w{red blue green orange white}.to_vector
|
383
|
+
values=[10,20,15,30,20].to_vector
|
384
|
+
col_ids=[1,2,1,2,3].to_scale
|
385
|
+
ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
|
386
|
+
assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
|
387
|
+
|
388
|
+
end
|
389
|
+
|
392
390
|
end
|
data/test/test_distribution.rb
CHANGED
@@ -1,13 +1,16 @@
|
|
1
|
-
|
1
|
+
require(File.dirname(__FILE__)+'/test_helpers.rb')
|
2
|
+
|
2
3
|
require 'distribution'
|
3
|
-
|
4
|
+
|
4
5
|
begin
|
5
|
-
|
6
|
-
|
6
|
+
require 'rbgsl'
|
7
|
+
NOT_GSL=false
|
7
8
|
rescue LoadError
|
8
|
-
|
9
|
+
NOT_GSL=true
|
9
10
|
end
|
10
|
-
|
11
|
+
|
12
|
+
|
13
|
+
class DistributionTestCase < MiniTest::Unit::TestCase
|
11
14
|
def test_chi
|
12
15
|
if !NOT_GSL
|
13
16
|
[2,3,4,5].each{|k|
|
@@ -25,7 +28,7 @@ class DistributionTestCase < Test::Unit::TestCase
|
|
25
28
|
area=Distribution::T.cdf(t,n)
|
26
29
|
assert_in_delta(area, GSL::Cdf.tdist_P(t,n),0.0001)
|
27
30
|
assert_in_delta(Distribution::T.p_value(area,n), GSL::Cdf.tdist_Pinv(area,n),0.0001)
|
28
|
-
|
31
|
+
|
29
32
|
}
|
30
33
|
}
|
31
34
|
end
|
@@ -44,34 +47,34 @@ class DistributionTestCase < Test::Unit::TestCase
|
|
44
47
|
if !NOT_GSL
|
45
48
|
[0.2,0.4,0.6,0.8,0.9, 0.99,0.999,0.999999].each {|rho|
|
46
49
|
assert_equal(GSL::Ran::bivariate_gaussian_pdf(0, 0, 1,1,rho), Distribution::NormalBivariate.pdf(0,0, rho , 1,1))
|
47
|
-
|
50
|
+
|
48
51
|
}
|
49
52
|
end
|
50
|
-
|
53
|
+
|
51
54
|
[-3,-2,-1,0,1,1.5].each {|x|
|
52
55
|
assert_in_delta(Distribution::NormalBivariate.cdf_hull(x,x,0.5), Distribution::NormalBivariate.cdf_genz(x,x,0.5), 0.001)
|
53
56
|
assert_in_delta(Distribution::NormalBivariate.cdf_genz(x,x,0.5), Distribution::NormalBivariate.cdf_jantaravareerat(x,x,0.5), 0.001)
|
54
57
|
}
|
55
|
-
|
58
|
+
|
56
59
|
assert_in_delta(0.686, Distribution::NormalBivariate.cdf(2,0.5,0.5), 0.001)
|
57
60
|
assert_in_delta(0.498, Distribution::NormalBivariate.cdf(2,0.0,0.5), 0.001)
|
58
61
|
assert_in_delta(0.671, Distribution::NormalBivariate.cdf(1.5,0.5,0.5), 0.001)
|
59
|
-
|
62
|
+
|
60
63
|
assert_in_delta(Distribution::Normal.cdf(0), Distribution::NormalBivariate.cdf(10,0,0.9), 0.001)
|
61
64
|
end
|
62
65
|
def test_f
|
63
66
|
if !NOT_GSL
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
}
|
67
|
+
[0.1,0.5,1,2,10,20,30].each{|f|
|
68
|
+
[2,5,10].each{|n2|
|
69
|
+
[2,5,10].each{|n1|
|
70
|
+
area=Distribution::F.cdf(f,n1,n2)
|
71
|
+
assert_in_delta(area, GSL::Cdf.fdist_P(f,n1,n2),0.0001)
|
72
|
+
assert_in_delta(Distribution::F.p_value(area,n1,n2), GSL::Cdf.fdist_Pinv(area,n1,n2),0.0001)
|
73
|
+
|
74
|
+
}
|
73
75
|
}
|
76
|
+
}
|
74
77
|
end
|
75
78
|
end
|
76
79
|
|
77
|
-
end
|
80
|
+
end
|