statsample 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +4 -3
- data/History.txt +4 -0
- data/README.md +4 -0
- data/lib/statsample/converter/csv.rb +41 -54
- data/lib/statsample/converters.rb +18 -19
- data/lib/statsample/version.rb +1 -1
- data/test/fixtures/scientific_notation.csv +4 -0
- data/test/helpers_tests.rb +37 -38
- data/test/test_analysis.rb +96 -97
- data/test/test_anova_contrast.rb +22 -22
- data/test/test_anovaoneway.rb +12 -12
- data/test/test_anovatwoway.rb +16 -17
- data/test/test_anovatwowaywithdataset.rb +22 -24
- data/test/test_anovawithvectors.rb +67 -69
- data/test/test_awesome_print_bug.rb +9 -9
- data/test/test_bartlettsphericity.rb +13 -13
- data/test/test_bivariate.rb +122 -126
- data/test/test_codification.rb +51 -49
- data/test/test_crosstab.rb +44 -40
- data/test/test_csv.rb +52 -70
- data/test/test_dataset.rb +347 -330
- data/test/test_dominance_analysis.rb +22 -24
- data/test/test_factor.rb +163 -166
- data/test/test_factor_map.rb +25 -30
- data/test/test_factor_pa.rb +28 -28
- data/test/test_ggobi.rb +19 -18
- data/test/test_gsl.rb +13 -15
- data/test/test_histogram.rb +74 -77
- data/test/test_matrix.rb +29 -31
- data/test/test_multiset.rb +132 -126
- data/test/test_regression.rb +143 -149
- data/test/test_reliability.rb +149 -155
- data/test/test_reliability_icc.rb +100 -104
- data/test/test_reliability_skillscale.rb +38 -40
- data/test/test_resample.rb +14 -12
- data/test/test_rserve_extension.rb +33 -33
- data/test/test_srs.rb +5 -5
- data/test/test_statistics.rb +52 -50
- data/test/test_stest.rb +27 -28
- data/test/test_stratified.rb +10 -10
- data/test/test_test_f.rb +17 -17
- data/test/test_test_kolmogorovsmirnov.rb +21 -21
- data/test/test_test_t.rb +52 -52
- data/test/test_umannwhitney.rb +16 -16
- data/test/test_vector.rb +419 -410
- data/test/test_wilcoxonsignedrank.rb +60 -63
- data/test/test_xls.rb +41 -41
- metadata +55 -5
- data/web/Rakefile +0 -39
data/test/test_csv.rb
CHANGED
@@ -1,81 +1,63 @@
|
|
1
|
-
require
|
2
|
-
|
1
|
+
require 'helpers_tests.rb'
|
2
|
+
|
3
|
+
class StatsampleCSVTestCase < Minitest::Test
|
3
4
|
def setup
|
4
|
-
@ds=Statsample::CSV.read(
|
5
|
+
@ds = Statsample::CSV.read('test/fixtures/test_csv.csv')
|
5
6
|
end
|
7
|
+
|
6
8
|
def test_read
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
|
15
|
-
ds_exp.fields.each{|f|
|
16
|
-
assert_equal(ds_exp[f],@ds[f])
|
9
|
+
header = %w(id name age city a1)
|
10
|
+
data = {
|
11
|
+
'id' => [1, 2, 3, 4, 5, 6].to_vector(:scale),
|
12
|
+
'name' => %w(Alex Claude Peter Franz George Fernand).to_vector(:nominal),
|
13
|
+
'age' => [20, 23, 25, 27, 5.5, nil].to_vector(:scale),
|
14
|
+
'city' => ['New York', 'London', 'London', 'Paris', 'Tome', nil].to_vector(:nominal),
|
15
|
+
'a1' => ['a,b', 'b,c', 'a', nil, 'a,b,c', nil].to_vector(:nominal)
|
17
16
|
}
|
18
|
-
|
19
|
-
|
17
|
+
|
18
|
+
ds_exp = Statsample::Dataset.new(data, header)
|
19
|
+
|
20
|
+
assert_equal(6, @ds.cases)
|
21
|
+
assert_equal(header, @ds.fields)
|
22
|
+
|
23
|
+
ds_exp.fields.each do |f|
|
24
|
+
assert_equal(ds_exp[f], @ds[f])
|
25
|
+
end
|
26
|
+
|
27
|
+
assert_equal(ds_exp, @ds)
|
28
|
+
end
|
29
|
+
|
20
30
|
def test_nil
|
21
|
-
assert_equal(nil
|
31
|
+
assert_equal(nil, @ds['age'][5])
|
22
32
|
end
|
33
|
+
|
23
34
|
def test_repeated
|
24
|
-
ds=Statsample::CSV.read(
|
25
|
-
assert_equal(%w
|
26
|
-
age=[3,4,5,6,nil,8].to_vector(:scale)
|
27
|
-
assert_equal(age,ds['age_2'])
|
28
|
-
end
|
35
|
+
ds = Statsample::CSV.read('test/fixtures/repeated_fields.csv')
|
36
|
+
assert_equal(%w(id name_1 age_1 city a1 name_2 age_2), ds.fields)
|
37
|
+
age = [3, 4, 5, 6, nil, 8].to_vector(:scale)
|
38
|
+
assert_equal(age, ds['age_2'])
|
39
|
+
end
|
40
|
+
|
41
|
+
# Testing fix for SciRuby/statsample#19.
|
42
|
+
def test_accept_scientific_notation_as_float
|
43
|
+
ds = Statsample::CSV.read('test/fixtures/scientific_notation.csv')
|
44
|
+
assert_equal(%w(x y), ds.fields)
|
45
|
+
y = [9.629587310436753e+127, 1.9341543147883677e+129, 3.88485279048245e+130]
|
46
|
+
y.zip(ds['y']).each do |y_expected, y_ds|
|
47
|
+
assert_in_delta(y_expected, y_ds)
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
29
52
|
def test_write
|
30
|
-
filename=Tempfile.new(
|
31
|
-
# filename=Dir::tmpdir+"/test_write.csv"
|
53
|
+
filename = Tempfile.new('afile')
|
32
54
|
Statsample::CSV.write(@ds, filename.path)
|
33
|
-
ds2=Statsample::CSV.read(filename.path)
|
34
|
-
i=0
|
35
|
-
|
36
|
-
|
37
|
-
i
|
38
|
-
|
39
|
-
|
40
|
-
end
|
41
|
-
=begin
|
42
|
-
class StatsampleCSVTestCase2 < MiniTest::Unit::TestCase
|
43
|
-
def setup
|
44
|
-
@ds=Statsample::CSV.read19(File.dirname(__FILE__)+"/fixtures/test_csv.csv")
|
45
|
-
end
|
46
|
-
def test_read
|
47
|
-
assert_equal(6,@ds.cases)
|
48
|
-
assert_equal(%w{id name age city a1}, @ds.fields)
|
49
|
-
id=[1,2,3,4,5,6].to_vector(:scale)
|
50
|
-
name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
|
51
|
-
age=[20,23,25,27,5.5,nil].to_vector(:scale)
|
52
|
-
city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
|
53
|
-
a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
|
54
|
-
ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
|
55
|
-
ds_exp.fields.each{|f|
|
56
|
-
assert_equal(ds_exp[f],@ds[f])
|
57
|
-
}
|
58
|
-
assert_equal(ds_exp,@ds)
|
59
|
-
end
|
60
|
-
def test_nil
|
61
|
-
assert_equal(nil,@ds['age'][5])
|
62
|
-
end
|
63
|
-
def test_repeated
|
64
|
-
ds=Statsample::CSV.read19(File.dirname(__FILE__)+"/fixtures/repeated_fields.csv")
|
65
|
-
assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
|
66
|
-
age=[3,4,5,6,nil,8].to_vector(:scale)
|
67
|
-
assert_equal(age,ds['age_2'])
|
68
|
-
end
|
69
|
-
def test_write
|
70
|
-
filename=Tempfile.new("afile")
|
71
|
-
# filename=Dir::tmpdir+"/test_write.csv"
|
72
|
-
Statsample::CSV.write(@ds, filename.path)
|
73
|
-
ds2=Statsample::CSV.read19(filename.path)
|
74
|
-
i=0
|
75
|
-
ds2.each_array{|row|
|
76
|
-
assert_equal(@ds.case_as_array(i),row)
|
77
|
-
i+=1
|
78
|
-
}
|
55
|
+
ds2 = Statsample::CSV.read(filename.path)
|
56
|
+
i = 0
|
57
|
+
|
58
|
+
ds2.each_array do |row|
|
59
|
+
assert_equal(@ds.case_as_array(i), row)
|
60
|
+
i += 1
|
61
|
+
end
|
79
62
|
end
|
80
63
|
end
|
81
|
-
=end
|
data/test/test_dataset.rb
CHANGED
@@ -1,188 +1,199 @@
|
|
1
|
-
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
2
|
-
class StatsampleDatasetTestCase <
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
class StatsampleDatasetTestCase < Minitest::Test
|
3
3
|
def setup
|
4
|
-
@ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w
|
5
|
-
|
6
|
-
|
4
|
+
@ds = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([1, 2, 3, 4, 5]), 'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)), 'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
|
5
|
+
'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
|
6
|
+
'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) }, %w(id name age city a1))
|
7
7
|
end
|
8
|
+
|
8
9
|
def test_nest
|
9
|
-
ds={
|
10
|
-
'a'
|
11
|
-
'b'
|
12
|
-
'c'
|
10
|
+
ds = {
|
11
|
+
'a' => %w(a a a b b b).to_vector,
|
12
|
+
'b' => %w(c c d d e e).to_vector,
|
13
|
+
'c' => %w(f g h i j k).to_vector
|
13
14
|
}.to_dataset
|
14
|
-
nest=ds.nest('a','b')
|
15
|
-
assert_equal([{'c'=>'f'},{'c'=>'g'}], nest['a']['c'])
|
16
|
-
assert_equal([{'c'=>'h'}], nest['a']['d'])
|
17
|
-
assert_equal([{'c'=>'j'},{'c'=>'k'}], nest['b']['e'])
|
18
|
-
|
15
|
+
nest = ds.nest('a', 'b')
|
16
|
+
assert_equal([{ 'c' => 'f' }, { 'c' => 'g' }], nest['a']['c'])
|
17
|
+
assert_equal([{ 'c' => 'h' }], nest['a']['d'])
|
18
|
+
assert_equal([{ 'c' => 'j' }, { 'c' => 'k' }], nest['b']['e'])
|
19
19
|
end
|
20
|
+
|
20
21
|
def test_should_have_summary
|
21
|
-
assert(@ds.summary.size>0)
|
22
|
+
assert(@ds.summary.size > 0)
|
22
23
|
end
|
24
|
+
|
23
25
|
def test_basic
|
24
|
-
assert_equal(5
|
25
|
-
assert_equal(%w
|
26
|
+
assert_equal(5, @ds.cases)
|
27
|
+
assert_equal(%w(id name age city a1), @ds.fields)
|
26
28
|
end
|
29
|
+
|
27
30
|
def test_saveload
|
28
|
-
outfile=Tempfile.new(
|
31
|
+
outfile = Tempfile.new('dataset.ds')
|
29
32
|
@ds.save(outfile.path)
|
30
|
-
a=Statsample.load(outfile.path)
|
31
|
-
assert_equal(@ds,a)
|
33
|
+
a = Statsample.load(outfile.path)
|
34
|
+
assert_equal(@ds, a)
|
32
35
|
end
|
36
|
+
|
33
37
|
def test_gsl
|
34
38
|
if Statsample.has_gsl?
|
35
|
-
matrix=GSL::Matrix[[1,2],[3,4],[5,6]]
|
36
|
-
ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
|
37
|
-
assert_equal(matrix,ds.to_gsl)
|
39
|
+
matrix = GSL::Matrix[[1, 2], [3, 4], [5, 6]]
|
40
|
+
ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
|
41
|
+
assert_equal(matrix, ds.to_gsl)
|
38
42
|
else
|
39
|
-
skip(
|
43
|
+
skip('Gsl needed')
|
40
44
|
end
|
41
45
|
end
|
46
|
+
|
42
47
|
def test_matrix
|
43
|
-
matrix=Matrix[[1,2],[3,4],[5,6]]
|
44
|
-
ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
|
45
|
-
assert_equal(matrix,ds.to_matrix)
|
48
|
+
matrix = Matrix[[1, 2], [3, 4], [5, 6]]
|
49
|
+
ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
|
50
|
+
assert_equal(matrix, ds.to_matrix)
|
46
51
|
end
|
47
52
|
|
48
53
|
def test_fields
|
49
|
-
@ds.fields
|
50
|
-
assert_equal(%w
|
51
|
-
@ds.fields
|
52
|
-
assert_equal(%w
|
54
|
+
@ds.fields = %w(name a1 id age city)
|
55
|
+
assert_equal(%w(name a1 id age city), @ds.fields)
|
56
|
+
@ds.fields = %w(id name age)
|
57
|
+
assert_equal(%w(id name age a1 city), @ds.fields)
|
53
58
|
end
|
59
|
+
|
54
60
|
def test_merge
|
55
|
-
a=[1,2,3].to_scale
|
56
|
-
b=[3,4,5].to_vector
|
57
|
-
c=[4,5,6].to_scale
|
58
|
-
d=[7,8,9].to_vector
|
59
|
-
e=[10,20,30].to_vector
|
60
|
-
ds1={'a'=>a,'b'=>b}.to_dataset
|
61
|
-
ds2={'c'=>c,'d'=>d}.to_dataset
|
62
|
-
exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
63
|
-
|
64
|
-
assert_equal(exp,ds1.merge(ds2))
|
65
|
-
exp.fields
|
66
|
-
assert_equal(exp,ds2.merge(ds1))
|
67
|
-
ds3={'a'=>e}.to_dataset
|
68
|
-
exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
|
69
|
-
exp.fields
|
70
|
-
assert_equal(exp,ds1.merge(ds3))
|
61
|
+
a = [1, 2, 3].to_scale
|
62
|
+
b = [3, 4, 5].to_vector
|
63
|
+
c = [4, 5, 6].to_scale
|
64
|
+
d = [7, 8, 9].to_vector
|
65
|
+
e = [10, 20, 30].to_vector
|
66
|
+
ds1 = { 'a' => a, 'b' => b }.to_dataset
|
67
|
+
ds2 = { 'c' => c, 'd' => d }.to_dataset
|
68
|
+
exp = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
|
69
|
+
|
70
|
+
assert_equal(exp, ds1.merge(ds2))
|
71
|
+
exp.fields = %w(c d a b)
|
72
|
+
assert_equal(exp, ds2.merge(ds1))
|
73
|
+
ds3 = { 'a' => e }.to_dataset
|
74
|
+
exp = { 'a_1' => a, 'b' => b, 'a_2' => e }.to_dataset
|
75
|
+
exp.fields = %w(a_1 b a_2)
|
76
|
+
assert_equal(exp, ds1.merge(ds3))
|
71
77
|
end
|
78
|
+
|
72
79
|
def test_each_vector
|
73
|
-
a=[1,2,3].to_vector
|
74
|
-
b=[3,4,5].to_vector
|
75
|
-
fields=
|
76
|
-
ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
|
77
|
-
res=[]
|
78
|
-
ds.each_vector{|k,v|
|
79
|
-
res.push([k,v])
|
80
|
+
a = [1, 2, 3].to_vector
|
81
|
+
b = [3, 4, 5].to_vector
|
82
|
+
fields = %w(a b)
|
83
|
+
ds = Statsample::Dataset.new({ 'a' => a, 'b' => b }, fields)
|
84
|
+
res = []
|
85
|
+
ds.each_vector{|k, v|
|
86
|
+
res.push([k, v])
|
80
87
|
}
|
81
|
-
assert_equal([[
|
82
|
-
ds.fields=
|
83
|
-
res=[]
|
84
|
-
ds.each_vector{|k,v|
|
85
|
-
res.push([k,v])
|
88
|
+
assert_equal([['a', a], ['b', b]], res)
|
89
|
+
ds.fields = %w(b a)
|
90
|
+
res = []
|
91
|
+
ds.each_vector{|k, v|
|
92
|
+
res.push([k, v])
|
86
93
|
}
|
87
|
-
assert_equal([[
|
94
|
+
assert_equal([['b', b], ['a', a]], res)
|
88
95
|
end
|
96
|
+
|
89
97
|
def test_equality
|
90
|
-
v1=[1,2,3,4].to_vector
|
91
|
-
v2=[5,6,7,8].to_vector
|
92
|
-
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w
|
93
|
-
v3=[1,2,3,4].to_vector
|
94
|
-
v4=[5,6,7,8].to_vector
|
95
|
-
ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w
|
96
|
-
assert_equal(ds1,ds2)
|
97
|
-
ds2.fields
|
98
|
-
assert_not_equal(ds1,ds2)
|
98
|
+
v1 = [1, 2, 3, 4].to_vector
|
99
|
+
v2 = [5, 6, 7, 8].to_vector
|
100
|
+
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
|
101
|
+
v3 = [1, 2, 3, 4].to_vector
|
102
|
+
v4 = [5, 6, 7, 8].to_vector
|
103
|
+
ds2 = Statsample::Dataset.new({ 'v1' => v3, 'v2' => v4 }, %w(v2 v1))
|
104
|
+
assert_equal(ds1, ds2)
|
105
|
+
ds2.fields = %w(v1 v2)
|
106
|
+
assert_not_equal(ds1, ds2)
|
99
107
|
end
|
108
|
+
|
100
109
|
def test_add_vector
|
101
|
-
v=Statsample::Vector.new(%w
|
102
|
-
@ds.add_vector('new',v)
|
103
|
-
assert_equal(%w
|
104
|
-
x=Statsample::Vector.new(%w
|
110
|
+
v = Statsample::Vector.new(%w(a b c d e))
|
111
|
+
@ds.add_vector('new', v)
|
112
|
+
assert_equal(%w(id name age city a1 new), @ds.fields)
|
113
|
+
x = Statsample::Vector.new(%w(a b c d e f g))
|
105
114
|
assert_raise ArgumentError do
|
106
|
-
@ds.add_vector('new2',x)
|
115
|
+
@ds.add_vector('new2', x)
|
107
116
|
end
|
108
117
|
end
|
118
|
+
|
109
119
|
def test_vector_by_calculation
|
110
|
-
a1=[1,2,3,4,5,6,7].to_vector(:scale)
|
111
|
-
a2=[10,20,30,40,50,60,70].to_vector(:scale)
|
112
|
-
a3=[100,200,300,400,500,600,700].to_vector(:scale)
|
113
|
-
ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
|
114
|
-
total=ds.vector_by_calculation
|
115
|
-
row['a1']+row['a2']+row['a3']
|
120
|
+
a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:scale)
|
121
|
+
a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:scale)
|
122
|
+
a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:scale)
|
123
|
+
ds = { 'a1' => a1, 'a2' => a2, 'a3' => a3 }.to_dataset
|
124
|
+
total = ds.vector_by_calculation {|row|
|
125
|
+
row['a1'] + row['a2'] + row['a3']
|
116
126
|
}
|
117
|
-
expected=[111,222,333,444,555,666,777].to_vector(:scale)
|
118
|
-
assert_equal(expected,total)
|
127
|
+
expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:scale)
|
128
|
+
assert_equal(expected, total)
|
119
129
|
end
|
130
|
+
|
120
131
|
def test_vector_sum
|
121
|
-
a1=[1
|
122
|
-
a2=[10
|
123
|
-
b1=[nil,1 ,1
|
124
|
-
b2=[2
|
125
|
-
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
|
126
|
-
total=ds.vector_sum
|
127
|
-
a=ds.vector_sum(
|
128
|
-
b=ds.vector_sum(
|
129
|
-
expected_a=[11,12,23,24,25,nil].to_vector(:scale)
|
130
|
-
expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
|
131
|
-
expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
|
132
|
+
a1 = [1, 2, 3, 4, 5, nil].to_vector(:scale)
|
133
|
+
a2 = [10, 10, 20, 20, 20, 30].to_vector(:scale)
|
134
|
+
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:scale)
|
135
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
|
136
|
+
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2 }.to_dataset
|
137
|
+
total = ds.vector_sum
|
138
|
+
a = ds.vector_sum(%w(a1 a2))
|
139
|
+
b = ds.vector_sum(%w(b1 b2))
|
140
|
+
expected_a = [11, 12, 23, 24, 25, nil].to_vector(:scale)
|
141
|
+
expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:scale)
|
142
|
+
expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:scale)
|
132
143
|
assert_equal(expected_a, a)
|
133
144
|
assert_equal(expected_b, b)
|
134
145
|
assert_equal(expected_total, total)
|
135
146
|
end
|
147
|
+
|
136
148
|
def test_vector_missing_values
|
137
|
-
a1=[1
|
138
|
-
a2=[10 ,
|
139
|
-
b1=[nil,nil ,1
|
140
|
-
b2=[2
|
141
|
-
c= [nil,2
|
142
|
-
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
143
|
-
mva=[2,3,0,1,0,1].to_vector(:scale)
|
144
|
-
assert_equal(mva,ds.vector_missing_values)
|
145
|
-
end
|
146
|
-
|
149
|
+
a1 = [1, nil, 3, 4, 5, nil].to_vector(:scale)
|
150
|
+
a2 = [10, nil, 20, 20, 20, 30].to_vector(:scale)
|
151
|
+
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:scale)
|
152
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
|
153
|
+
c = [nil, 2, 4, 2, 2, 2].to_vector(:scale)
|
154
|
+
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
155
|
+
mva = [2, 3, 0, 1, 0, 1].to_vector(:scale)
|
156
|
+
assert_equal(mva, ds.vector_missing_values)
|
157
|
+
end
|
158
|
+
|
147
159
|
def test_has_missing_values
|
148
|
-
a1=[1
|
149
|
-
a2=[10 ,
|
150
|
-
b1=[nil,nil ,1
|
151
|
-
b2=[2
|
152
|
-
c= [nil,2
|
153
|
-
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
160
|
+
a1 = [1, nil, 3, 4, 5, nil].to_vector(:scale)
|
161
|
+
a2 = [10, nil, 20, 20, 20, 30].to_vector(:scale)
|
162
|
+
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:scale)
|
163
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
|
164
|
+
c = [nil, 2, 4, 2, 2, 2].to_vector(:scale)
|
165
|
+
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
154
166
|
assert(ds.has_missing_data?)
|
155
|
-
clean=ds.dup_only_valid
|
167
|
+
clean = ds.dup_only_valid
|
156
168
|
assert(!clean.has_missing_data?)
|
157
169
|
end
|
158
|
-
|
159
|
-
|
160
|
-
def test_vector_count_characters
|
161
|
-
a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
|
162
|
-
a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
|
163
|
-
b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
|
164
|
-
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
165
|
-
c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
|
166
|
-
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
167
|
-
exp=[4,17,27,5,6,5].to_vector(:scale)
|
168
|
-
assert_equal(exp,ds.vector_count_characters)
|
169
170
|
|
171
|
+
def test_vector_count_characters
|
172
|
+
a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:scale)
|
173
|
+
a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:scale)
|
174
|
+
b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:scale)
|
175
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
|
176
|
+
c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:scale)
|
177
|
+
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
178
|
+
exp = [4, 17, 27, 5, 6, 5].to_vector(:scale)
|
179
|
+
assert_equal(exp, ds.vector_count_characters)
|
170
180
|
end
|
181
|
+
|
171
182
|
def test_vector_mean
|
172
|
-
a1=[1
|
173
|
-
a2=[10
|
174
|
-
b1=[nil,1 ,1
|
175
|
-
b2=[2
|
176
|
-
c= [nil,2, 4,2
|
177
|
-
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
178
|
-
total=ds.vector_mean
|
179
|
-
a=ds.vector_mean(
|
180
|
-
b=ds.vector_mean(
|
181
|
-
c=ds.vector_mean(
|
182
|
-
expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
|
183
|
-
expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
|
184
|
-
expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
|
185
|
-
expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
|
183
|
+
a1 = [1, 2, 3, 4, 5, nil].to_vector(:scale)
|
184
|
+
a2 = [10, 10, 20, 20, 20, 30].to_vector(:scale)
|
185
|
+
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:scale)
|
186
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
|
187
|
+
c = [nil, 2, 4, 2, 2, 2].to_vector(:scale)
|
188
|
+
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
189
|
+
total = ds.vector_mean
|
190
|
+
a = ds.vector_mean(%w(a1 a2), 1)
|
191
|
+
b = ds.vector_mean(%w(b1 b2), 1)
|
192
|
+
c = ds.vector_mean(%w(b1 b2 c), 1)
|
193
|
+
expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:scale)
|
194
|
+
expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:scale)
|
195
|
+
expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:scale)
|
196
|
+
expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:scale)
|
186
197
|
assert_equal(expected_a, a)
|
187
198
|
assert_equal(expected_b, b)
|
188
199
|
assert_equal(expected_c, c)
|
@@ -190,273 +201,279 @@ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
|
|
190
201
|
end
|
191
202
|
|
192
203
|
def test_each_array
|
193
|
-
expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
|
194
|
-
out=[]
|
204
|
+
expected = [[1, 'Alex', 20, 'New York', 'a,b'], [2, 'Claude', 23, 'London', 'b,c'], [3, 'Peter', 25, 'London', 'a'], [4, 'Franz', 27, 'Paris', nil], [5, 'George', 5, 'Tome', 'a,b,c']]
|
205
|
+
out = []
|
195
206
|
@ds.each_array{ |a|
|
196
207
|
out.push(a)
|
197
208
|
}
|
198
|
-
assert_equal(expected,out)
|
209
|
+
assert_equal(expected, out)
|
199
210
|
end
|
211
|
+
|
200
212
|
def test_recode
|
201
|
-
@ds['age'].type
|
202
|
-
@ds.recode!(
|
203
|
-
expected=[2,4,6,8,10].to_vector(:scale)
|
204
|
-
assert_equal(expected
|
213
|
+
@ds['age'].type = :scale
|
214
|
+
@ds.recode!('age') { |c| c['id'] * 2 }
|
215
|
+
expected = [2, 4, 6, 8, 10].to_vector(:scale)
|
216
|
+
assert_equal(expected, @ds['age'])
|
205
217
|
end
|
218
|
+
|
206
219
|
def test_case_as
|
207
|
-
assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'}
|
208
|
-
assert_equal([5,'George',5,'Tome','a,b,c']
|
220
|
+
assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds.case_as_hash(0))
|
221
|
+
assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds.case_as_array(4))
|
209
222
|
# Native methods
|
210
|
-
assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'}
|
211
|
-
assert_equal([5,'George',5,'Tome','a,b,c']
|
212
|
-
|
213
|
-
|
214
|
-
|
223
|
+
assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds._case_as_hash(0))
|
224
|
+
assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds._case_as_array(4))
|
215
225
|
end
|
226
|
+
|
216
227
|
def test_delete_vector
|
217
228
|
@ds.delete_vector('name')
|
218
|
-
assert_equal(%w
|
219
|
-
assert_equal(%w
|
229
|
+
assert_equal(%w(id age city a1), @ds.fields)
|
230
|
+
assert_equal(%w(a1 age city id), @ds.vectors.keys.sort)
|
220
231
|
end
|
232
|
+
|
221
233
|
def test_change_type
|
222
|
-
@ds.col('age').type
|
223
|
-
assert_equal(:scale
|
234
|
+
@ds.col('age').type = :scale
|
235
|
+
assert_equal(:scale, @ds.col('age').type)
|
224
236
|
end
|
237
|
+
|
225
238
|
def test_split_by_separator_recode
|
226
|
-
@ds.add_vectors_by_split_recode(
|
227
|
-
assert_equal(%w
|
228
|
-
assert_equal([1,0,1,nil,1]
|
229
|
-
assert_equal([1,1,0,nil,1]
|
230
|
-
assert_equal([0,1,0,nil,1]
|
231
|
-
{'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'}.each do |k,v|
|
239
|
+
@ds.add_vectors_by_split_recode('a1', '_')
|
240
|
+
assert_equal(%w(id name age city a1 a1_1 a1_2 a1_3), @ds.fields)
|
241
|
+
assert_equal([1, 0, 1, nil, 1], @ds.col('a1_1').to_a)
|
242
|
+
assert_equal([1, 1, 0, nil, 1], @ds.col('a1_2').to_a)
|
243
|
+
assert_equal([0, 1, 0, nil, 1], @ds.col('a1_3').to_a)
|
244
|
+
{ 'a1_1' => 'a1:a', 'a1_2' => 'a1:b', 'a1_3' => 'a1:c' }.each do |k, v|
|
232
245
|
assert_equal(v, @ds[k].name)
|
233
246
|
end
|
234
247
|
end
|
248
|
+
|
235
249
|
def test_split_by_separator
|
236
|
-
@ds.add_vectors_by_split(
|
237
|
-
assert_equal(%w
|
238
|
-
assert_equal([1,0,1,nil,1]
|
239
|
-
assert_equal([1,1,0,nil,1]
|
240
|
-
assert_equal([0,1,0,nil,1]
|
250
|
+
@ds.add_vectors_by_split('a1', '_')
|
251
|
+
assert_equal(%w(id name age city a1 a1_a a1_b a1_c), @ds.fields)
|
252
|
+
assert_equal([1, 0, 1, nil, 1], @ds.col('a1_a').to_a)
|
253
|
+
assert_equal([1, 1, 0, nil, 1], @ds.col('a1_b').to_a)
|
254
|
+
assert_equal([0, 1, 0, nil, 1], @ds.col('a1_c').to_a)
|
241
255
|
end
|
256
|
+
|
242
257
|
def test_percentiles
|
243
|
-
v1=(1..100).to_a.to_scale
|
244
|
-
assert_equal(50.5,v1.median)
|
258
|
+
v1 = (1..100).to_a.to_scale
|
259
|
+
assert_equal(50.5, v1.median)
|
245
260
|
assert_equal(25.5, v1.percentil(25))
|
246
|
-
v2=(1..99).to_a.to_scale
|
247
|
-
assert_equal(50,v2.median)
|
248
|
-
assert_equal(25,v2.percentil(25))
|
249
|
-
v3=(1..50).to_a.to_scale
|
261
|
+
v2 = (1..99).to_a.to_scale
|
262
|
+
assert_equal(50, v2.median)
|
263
|
+
assert_equal(25, v2.percentil(25))
|
264
|
+
v3 = (1..50).to_a.to_scale
|
250
265
|
assert_equal(25.5, v3.median)
|
251
266
|
assert_equal(13, v3.percentil(25))
|
252
|
-
|
253
267
|
end
|
268
|
+
|
254
269
|
def test_add_case
|
255
|
-
ds=Statsample::Dataset.new(
|
256
|
-
ds.add_case([1,2,3])
|
257
|
-
ds.add_case(
|
258
|
-
ds.add_case([[7,8,9]
|
259
|
-
assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
|
260
|
-
assert_equal([4,5,6],ds.case_as_array(1))
|
261
|
-
assert_equal([7,8,9],ds.case_as_array(2))
|
262
|
-
assert_equal(
|
263
|
-
ds.add_case_array([6,7,1])
|
270
|
+
ds = Statsample::Dataset.new('a' => [].to_vector, 'b' => [].to_vector, 'c' => [].to_vector)
|
271
|
+
ds.add_case([1, 2, 3])
|
272
|
+
ds.add_case('a' => 4, 'b' => 5, 'c' => 6)
|
273
|
+
ds.add_case([[7, 8, 9], %w(a b c)])
|
274
|
+
assert_equal({ 'a' => 1, 'b' => 2, 'c' => 3 }, ds.case_as_hash(0))
|
275
|
+
assert_equal([4, 5, 6], ds.case_as_array(1))
|
276
|
+
assert_equal([7, 8, 9], ds.case_as_array(2))
|
277
|
+
assert_equal(%w(a b c), ds.case_as_array(3))
|
278
|
+
ds.add_case_array([6, 7, 1])
|
264
279
|
ds.update_valid_data
|
265
|
-
assert_equal([6,7,1],ds.case_as_array(4))
|
266
|
-
|
280
|
+
assert_equal([6, 7, 1], ds.case_as_array(4))
|
267
281
|
end
|
282
|
+
|
268
283
|
def test_marshaling
|
269
|
-
ds_marshal=Marshal.load(Marshal.dump(@ds))
|
270
|
-
assert_equal(ds_marshal
|
284
|
+
ds_marshal = Marshal.load(Marshal.dump(@ds))
|
285
|
+
assert_equal(ds_marshal, @ds)
|
271
286
|
end
|
272
|
-
def test_range
|
273
|
-
v1=[1,2,3,4].to_vector
|
274
|
-
v2=[5,6,7,8].to_vector
|
275
|
-
v3=[9,10,11,12].to_vector
|
276
|
-
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
|
277
|
-
assert_same(v1,ds1['v1'])
|
278
|
-
ds2=ds1["v2".."v1"]
|
279
|
-
assert_equal(%w{v2 v1},ds2.fields)
|
280
|
-
assert_same(ds1['v1'],ds2['v1'])
|
281
|
-
assert_same(ds1['v2'],ds2['v2'])
|
282
|
-
|
283
287
|
|
288
|
+
def test_range
|
289
|
+
v1 = [1, 2, 3, 4].to_vector
|
290
|
+
v2 = [5, 6, 7, 8].to_vector
|
291
|
+
v3 = [9, 10, 11, 12].to_vector
|
292
|
+
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2, 'v3' => v3 }, %w(v3 v2 v1))
|
293
|
+
assert_same(v1, ds1['v1'])
|
294
|
+
ds2 = ds1['v2'..'v1']
|
295
|
+
assert_equal(%w(v2 v1), ds2.fields)
|
296
|
+
assert_same(ds1['v1'], ds2['v1'])
|
297
|
+
assert_same(ds1['v2'], ds2['v2'])
|
284
298
|
end
|
299
|
+
|
285
300
|
def test_clone
|
286
|
-
v1=[1,2,3,4].to_vector
|
287
|
-
v2=[5,6,7,8].to_vector
|
288
|
-
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w
|
289
|
-
ds2=ds1.clone
|
290
|
-
assert_equal(ds1,ds2)
|
291
|
-
assert_not_same(ds1,ds2)
|
292
|
-
assert_equal(ds1['v1'],ds2['v1'])
|
301
|
+
v1 = [1, 2, 3, 4].to_vector
|
302
|
+
v2 = [5, 6, 7, 8].to_vector
|
303
|
+
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
|
304
|
+
ds2 = ds1.clone
|
305
|
+
assert_equal(ds1, ds2)
|
306
|
+
assert_not_same(ds1, ds2)
|
307
|
+
assert_equal(ds1['v1'], ds2['v1'])
|
293
308
|
assert_same(ds1['v1'], ds2['v1'])
|
294
|
-
assert_equal(ds1.fields,ds2.fields)
|
295
|
-
assert_not_same(ds1.fields,ds2.fields)
|
296
|
-
assert_equal(ds1.cases,ds2.cases)
|
309
|
+
assert_equal(ds1.fields, ds2.fields)
|
310
|
+
assert_not_same(ds1.fields, ds2.fields)
|
311
|
+
assert_equal(ds1.cases, ds2.cases)
|
297
312
|
|
298
313
|
# partial clone
|
299
|
-
ds3=ds1.clone('v1')
|
300
|
-
ds_exp=Statsample::Dataset.new({'v1'=>v1}
|
301
|
-
assert_equal(ds_exp,ds3)
|
302
|
-
assert_not_same(ds_exp,ds3)
|
303
|
-
assert_equal(ds3['v1'],ds_exp['v1'])
|
304
|
-
assert_same(ds3['v1'],ds_exp['v1'])
|
305
|
-
assert_equal(ds3.fields,ds_exp.fields)
|
306
|
-
assert_equal(ds3.cases,ds_exp.cases)
|
307
|
-
|
308
|
-
assert_not_same(ds3.fields,ds_exp.fields)
|
309
|
-
|
314
|
+
ds3 = ds1.clone('v1')
|
315
|
+
ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
|
316
|
+
assert_equal(ds_exp, ds3)
|
317
|
+
assert_not_same(ds_exp, ds3)
|
318
|
+
assert_equal(ds3['v1'], ds_exp['v1'])
|
319
|
+
assert_same(ds3['v1'], ds_exp['v1'])
|
320
|
+
assert_equal(ds3.fields, ds_exp.fields)
|
321
|
+
assert_equal(ds3.cases, ds_exp.cases)
|
322
|
+
|
323
|
+
assert_not_same(ds3.fields, ds_exp.fields)
|
310
324
|
end
|
325
|
+
|
311
326
|
def test_dup
|
312
|
-
v1=[1,2,3,4].to_vector
|
313
|
-
v2=[5,6,7,8].to_vector
|
314
|
-
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w
|
315
|
-
ds2=ds1.dup
|
316
|
-
assert_equal(ds1,ds2)
|
317
|
-
assert_not_same(ds1,ds2)
|
318
|
-
assert_equal(ds1['v1'],ds2['v1'])
|
319
|
-
assert_not_same(ds1['v1'],ds2['v1'])
|
320
|
-
assert_equal(ds1.cases,ds2.cases)
|
321
|
-
|
322
|
-
assert_equal(ds1.fields,ds2.fields)
|
323
|
-
assert_not_same(ds1.fields,ds2.fields)
|
324
|
-
ds1['v1'].type=:scale
|
325
|
-
# dup partial
|
326
|
-
ds3=ds1.dup('v1')
|
327
|
-
ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
|
328
|
-
assert_equal(ds_exp,ds3)
|
329
|
-
assert_not_same(ds_exp,ds3)
|
330
|
-
assert_equal(ds3['v1'],ds_exp['v1'])
|
331
|
-
assert_not_same(ds3['v1'],ds_exp['v1'])
|
332
|
-
assert_equal(ds3.fields,ds_exp.fields)
|
333
|
-
assert_equal(ds3.cases,ds_exp.cases)
|
327
|
+
v1 = [1, 2, 3, 4].to_vector
|
328
|
+
v2 = [5, 6, 7, 8].to_vector
|
329
|
+
ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
|
330
|
+
ds2 = ds1.dup
|
331
|
+
assert_equal(ds1, ds2)
|
332
|
+
assert_not_same(ds1, ds2)
|
333
|
+
assert_equal(ds1['v1'], ds2['v1'])
|
334
|
+
assert_not_same(ds1['v1'], ds2['v1'])
|
335
|
+
assert_equal(ds1.cases, ds2.cases)
|
334
336
|
|
335
|
-
|
337
|
+
assert_equal(ds1.fields, ds2.fields)
|
338
|
+
assert_not_same(ds1.fields, ds2.fields)
|
339
|
+
ds1['v1'].type = :scale
|
340
|
+
# dup partial
|
341
|
+
ds3 = ds1.dup('v1')
|
342
|
+
ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
|
343
|
+
assert_equal(ds_exp, ds3)
|
344
|
+
assert_not_same(ds_exp, ds3)
|
345
|
+
assert_equal(ds3['v1'], ds_exp['v1'])
|
346
|
+
assert_not_same(ds3['v1'], ds_exp['v1'])
|
347
|
+
assert_equal(ds3.fields, ds_exp.fields)
|
348
|
+
assert_equal(ds3.cases, ds_exp.cases)
|
336
349
|
|
350
|
+
assert_not_same(ds3.fields, ds_exp.fields)
|
337
351
|
|
338
352
|
# empty
|
339
|
-
ds3=ds1.dup_empty
|
340
|
-
assert_not_equal(ds1,ds3)
|
341
|
-
assert_not_equal(ds1['v1'],ds3['v1'])
|
342
|
-
assert_equal([],ds3['v1'].data)
|
343
|
-
assert_equal([],ds3['v2'].data)
|
344
|
-
assert_equal(:scale,ds3['v1'].type)
|
345
|
-
assert_equal(ds1.fields,ds2.fields)
|
346
|
-
assert_not_same(ds1.fields,ds2.fields)
|
353
|
+
ds3 = ds1.dup_empty
|
354
|
+
assert_not_equal(ds1, ds3)
|
355
|
+
assert_not_equal(ds1['v1'], ds3['v1'])
|
356
|
+
assert_equal([], ds3['v1'].data)
|
357
|
+
assert_equal([], ds3['v2'].data)
|
358
|
+
assert_equal(:scale, ds3['v1'].type)
|
359
|
+
assert_equal(ds1.fields, ds2.fields)
|
360
|
+
assert_not_same(ds1.fields, ds2.fields)
|
347
361
|
end
|
362
|
+
|
348
363
|
def test_from_to
|
349
|
-
assert_equal(%w
|
364
|
+
assert_equal(%w(name age city), @ds.from_to('name', 'city'))
|
350
365
|
assert_raise ArgumentError do
|
351
|
-
@ds.from_to(
|
366
|
+
@ds.from_to('name', 'a2')
|
352
367
|
end
|
353
368
|
end
|
369
|
+
|
354
370
|
def test_each_array_with_nils
|
355
|
-
v1=[1
|
356
|
-
v2=[5,6
|
357
|
-
v3=[9,10,11,12,20].to_vector(:scale
|
358
|
-
ds1=Statsample::Dataset.new(
|
359
|
-
ds2=ds1.dup_empty
|
371
|
+
v1 = [1, -99, 3, 4, 'na'].to_vector(:scale, missing_values: [-99, 'na'])
|
372
|
+
v2 = [5, 6, -99, 8, 20].to_vector(:scale, missing_values: [-99])
|
373
|
+
v3 = [9, 10, 11, 12, 20].to_vector(:scale, missing_values: [-99])
|
374
|
+
ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
|
375
|
+
ds2 = ds1.dup_empty
|
360
376
|
ds1.each_array_with_nils {|row|
|
361
377
|
ds2.add_case_array(row)
|
362
378
|
}
|
363
379
|
ds2.update_valid_data
|
364
|
-
assert_equal([1,nil,3,4,nil],ds2['v1'].data)
|
365
|
-
assert_equal([5,6,nil,8,20],ds2['v2'].data)
|
380
|
+
assert_equal([1, nil, 3, 4, nil], ds2['v1'].data)
|
381
|
+
assert_equal([5, 6, nil, 8, 20], ds2['v2'].data)
|
366
382
|
end
|
383
|
+
|
367
384
|
def test_dup_only_valid
|
368
|
-
v1=[1,nil,3,4].to_vector(:scale)
|
369
|
-
v2=[5,6,nil,8].to_vector(:scale)
|
370
|
-
v3=[9,10,11,12].to_vector(:scale)
|
371
|
-
ds1=Statsample::Dataset.new(
|
372
|
-
ds2=ds1.dup_only_valid
|
373
|
-
expected=Statsample::Dataset.new(
|
374
|
-
assert_equal(expected,ds2)
|
375
|
-
assert_equal(expected.vectors.values,Statsample
|
376
|
-
expected_partial=Statsample::Dataset.new(
|
377
|
-
assert_equal(expected_partial, ds1.dup_only_valid(%w
|
378
|
-
|
379
|
-
|
385
|
+
v1 = [1, nil, 3, 4].to_vector(:scale)
|
386
|
+
v2 = [5, 6, nil, 8].to_vector(:scale)
|
387
|
+
v3 = [9, 10, 11, 12].to_vector(:scale)
|
388
|
+
ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
|
389
|
+
ds2 = ds1.dup_only_valid
|
390
|
+
expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:scale), 'v2' => [5, 8].to_vector(:scale), 'v3' => [9, 12].to_vector(:scale))
|
391
|
+
assert_equal(expected, ds2)
|
392
|
+
assert_equal(expected.vectors.values, Statsample.only_valid(v1, v2, v3))
|
393
|
+
expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:scale), 'v3' => [9, 11, 12].to_vector(:scale))
|
394
|
+
assert_equal(expected_partial, ds1.dup_only_valid(%w(v1 v3)))
|
380
395
|
end
|
396
|
+
|
381
397
|
def test_filter
|
382
|
-
@ds['age'].type
|
383
|
-
filtered
|
384
|
-
expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w
|
385
|
-
|
386
|
-
|
387
|
-
assert_equal(expected,filtered)
|
398
|
+
@ds['age'].type = :scale
|
399
|
+
filtered = @ds.filter { |c| c['id'] == 2 or c['id'] == 4 }
|
400
|
+
expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :scale),
|
401
|
+
'city' => Statsample::Vector.new(%w(London Paris)),
|
402
|
+
'a1' => Statsample::Vector.new(['b,c', nil]) }, %w(id name age city a1))
|
403
|
+
assert_equal(expected, filtered)
|
388
404
|
end
|
389
|
-
def test_filter_field
|
390
|
-
@ds['age'].type=:scale
|
391
|
-
filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
|
392
|
-
expected=[2,4].to_vector
|
393
|
-
assert_equal(expected,filtered)
|
394
405
|
|
406
|
+
def test_filter_field
|
407
|
+
@ds['age'].type = :scale
|
408
|
+
filtered = @ds.filter_field('id') { |c| c['id'] == 2 or c['id'] == 4 }
|
409
|
+
expected = [2, 4].to_vector
|
410
|
+
assert_equal(expected, filtered)
|
395
411
|
end
|
412
|
+
|
396
413
|
def test_verify
|
397
|
-
name
|
398
|
-
v1=[1,2,3,4].to_vector(:scale)
|
399
|
-
v2=[4,3,2,1].to_vector(:scale)
|
400
|
-
v3=[10,20,30,40].to_vector(:scale)
|
401
|
-
v4
|
402
|
-
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
|
403
|
-
ds.fields
|
404
|
-
#Correct
|
405
|
-
t1=create_test(
|
406
|
-
t2=create_test(
|
414
|
+
name = %w(r1 r2 r3 r4).to_vector(:nominal)
|
415
|
+
v1 = [1, 2, 3, 4].to_vector(:scale)
|
416
|
+
v2 = [4, 3, 2, 1].to_vector(:scale)
|
417
|
+
v3 = [10, 20, 30, 40].to_vector(:scale)
|
418
|
+
v4 = %w(a b a b).to_vector(:nominal)
|
419
|
+
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'id' => name }.to_dataset
|
420
|
+
ds.fields = %w(v1 v2 v3 v4 id)
|
421
|
+
# Correct
|
422
|
+
t1 = create_test('If v4=a, v1 odd') { |r| r['v4'] == 'b' or (r['v4'] == 'a' and r['v1'].odd?) }
|
423
|
+
t2 = create_test('v3=v1*10') { |r| r['v3'] == r['v1'] * 10 }
|
407
424
|
# Fail!
|
408
|
-
t3=create_test("v4='b'") {|r| r['v4']=='b'}
|
409
|
-
exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
|
410
|
-
exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
|
411
|
-
res=ds.verify(t3,t1,t2)
|
412
|
-
assert_equal(exp1,res)
|
413
|
-
res=ds.verify('id',t1,t2,t3)
|
414
|
-
assert_equal(exp2,res)
|
425
|
+
t3 = create_test("v4='b'") { |r| r['v4'] == 'b' }
|
426
|
+
exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
|
427
|
+
exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
|
428
|
+
res = ds.verify(t3, t1, t2)
|
429
|
+
assert_equal(exp1, res)
|
430
|
+
res = ds.verify('id', t1, t2, t3)
|
431
|
+
assert_equal(exp2, res)
|
415
432
|
end
|
416
|
-
def test_compute_operation
|
417
|
-
v1=[1,2,3,4].to_vector(:scale)
|
418
|
-
v2=[4,3,2,1].to_vector(:scale)
|
419
|
-
v3=[10,20,30,40].to_vector(:scale)
|
420
|
-
vscale=[1.quo(2),1,3.quo(2),2].to_vector(:scale)
|
421
|
-
vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
|
422
|
-
vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
|
423
|
-
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
|
424
|
-
assert_equal(vscale,ds.compute("v1/2"))
|
425
|
-
assert_equal(vsum,ds.compute("v1+v2+v3"))
|
426
|
-
assert_equal(vmult,ds.compute("v1*v2"))
|
427
433
|
|
434
|
+
def test_compute_operation
|
435
|
+
v1 = [1, 2, 3, 4].to_vector(:scale)
|
436
|
+
v2 = [4, 3, 2, 1].to_vector(:scale)
|
437
|
+
v3 = [10, 20, 30, 40].to_vector(:scale)
|
438
|
+
vscale = [1.quo(2), 1, 3.quo(2), 2].to_vector(:scale)
|
439
|
+
vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:scale)
|
440
|
+
vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:scale)
|
441
|
+
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3 }.to_dataset
|
442
|
+
assert_equal(vscale, ds.compute('v1/2'))
|
443
|
+
assert_equal(vsum, ds.compute('v1+v2+v3'))
|
444
|
+
assert_equal(vmult, ds.compute('v1*v2'))
|
428
445
|
end
|
446
|
+
|
429
447
|
def test_crosstab_with_asignation
|
430
|
-
v1
|
431
|
-
v2
|
432
|
-
v3
|
433
|
-
ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
|
448
|
+
v1 = %w(a a a b b b c c c).to_vector
|
449
|
+
v2 = %w(a b c a b c a b c).to_vector
|
450
|
+
v3 = %w(0 1 0 0 1 1 0 0 1).to_scale
|
451
|
+
ds = Statsample::Dataset.crosstab_by_asignation(v1, v2, v3)
|
434
452
|
assert_equal(:nominal, ds['_id'].type)
|
435
453
|
assert_equal(:scale, ds['a'].type)
|
436
454
|
assert_equal(:scale, ds['b'].type)
|
437
|
-
ev_id
|
438
|
-
ev_a
|
439
|
-
ev_b
|
440
|
-
ev_c
|
441
|
-
ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
|
455
|
+
ev_id = %w(a b c).to_vector
|
456
|
+
ev_a = %w(0 0 0).to_scale
|
457
|
+
ev_b = %w(1 1 0).to_scale
|
458
|
+
ev_c = %w(0 1 1).to_scale
|
459
|
+
ds2 = { '_id' => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }.to_dataset
|
442
460
|
assert_equal(ds, ds2)
|
443
461
|
end
|
462
|
+
|
444
463
|
def test_one_to_many
|
445
|
-
cases=[
|
446
|
-
['1','george','red',10,'blue',20,nil,nil],
|
447
|
-
['2','fred','green',15,'orange',30,'white',20],
|
448
|
-
['3','alfred',nil,nil,nil,nil,nil,nil]
|
464
|
+
cases = [
|
465
|
+
['1', 'george', 'red', 10, 'blue', 20, nil, nil],
|
466
|
+
['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
|
467
|
+
['3', 'alfred', nil, nil, nil, nil, nil, nil]
|
449
468
|
]
|
450
|
-
ds=Statsample::Dataset.new(%w
|
451
|
-
cases.each {|c| ds.add_case_array c }
|
469
|
+
ds = Statsample::Dataset.new(%w(id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3))
|
470
|
+
cases.each { |c| ds.add_case_array c }
|
452
471
|
ds.update_valid_data
|
453
|
-
ids
|
454
|
-
colors
|
455
|
-
values=[10,20,15,30,20].to_vector
|
456
|
-
col_ids=[1,2,1,2,3].to_scale
|
457
|
-
ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(
|
458
|
-
assert_equal(ds_expected, ds.one_to_many(%w
|
459
|
-
|
472
|
+
ids = %w(1 1 2 2 2).to_vector
|
473
|
+
colors = %w(red blue green orange white).to_vector
|
474
|
+
values = [10, 20, 15, 30, 20].to_vector
|
475
|
+
col_ids = [1, 2, 1, 2, 3].to_scale
|
476
|
+
ds_expected = { 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values }.to_dataset(%w(id _col_id color value))
|
477
|
+
assert_equal(ds_expected, ds.one_to_many(%w(id), 'car_%v%n'))
|
460
478
|
end
|
461
|
-
|
462
479
|
end
|