statsample 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -3
  3. data/History.txt +4 -0
  4. data/README.md +4 -0
  5. data/lib/statsample/converter/csv.rb +41 -54
  6. data/lib/statsample/converters.rb +18 -19
  7. data/lib/statsample/version.rb +1 -1
  8. data/test/fixtures/scientific_notation.csv +4 -0
  9. data/test/helpers_tests.rb +37 -38
  10. data/test/test_analysis.rb +96 -97
  11. data/test/test_anova_contrast.rb +22 -22
  12. data/test/test_anovaoneway.rb +12 -12
  13. data/test/test_anovatwoway.rb +16 -17
  14. data/test/test_anovatwowaywithdataset.rb +22 -24
  15. data/test/test_anovawithvectors.rb +67 -69
  16. data/test/test_awesome_print_bug.rb +9 -9
  17. data/test/test_bartlettsphericity.rb +13 -13
  18. data/test/test_bivariate.rb +122 -126
  19. data/test/test_codification.rb +51 -49
  20. data/test/test_crosstab.rb +44 -40
  21. data/test/test_csv.rb +52 -70
  22. data/test/test_dataset.rb +347 -330
  23. data/test/test_dominance_analysis.rb +22 -24
  24. data/test/test_factor.rb +163 -166
  25. data/test/test_factor_map.rb +25 -30
  26. data/test/test_factor_pa.rb +28 -28
  27. data/test/test_ggobi.rb +19 -18
  28. data/test/test_gsl.rb +13 -15
  29. data/test/test_histogram.rb +74 -77
  30. data/test/test_matrix.rb +29 -31
  31. data/test/test_multiset.rb +132 -126
  32. data/test/test_regression.rb +143 -149
  33. data/test/test_reliability.rb +149 -155
  34. data/test/test_reliability_icc.rb +100 -104
  35. data/test/test_reliability_skillscale.rb +38 -40
  36. data/test/test_resample.rb +14 -12
  37. data/test/test_rserve_extension.rb +33 -33
  38. data/test/test_srs.rb +5 -5
  39. data/test/test_statistics.rb +52 -50
  40. data/test/test_stest.rb +27 -28
  41. data/test/test_stratified.rb +10 -10
  42. data/test/test_test_f.rb +17 -17
  43. data/test/test_test_kolmogorovsmirnov.rb +21 -21
  44. data/test/test_test_t.rb +52 -52
  45. data/test/test_umannwhitney.rb +16 -16
  46. data/test/test_vector.rb +419 -410
  47. data/test/test_wilcoxonsignedrank.rb +60 -63
  48. data/test/test_xls.rb +41 -41
  49. metadata +55 -5
  50. data/web/Rakefile +0 -39
data/test/test_csv.rb CHANGED
@@ -1,81 +1,63 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
- class StatsampleCSVTestCase < MiniTest::Unit::TestCase
1
+ require 'helpers_tests.rb'
2
+
3
+ class StatsampleCSVTestCase < Minitest::Test
3
4
  def setup
4
- @ds=Statsample::CSV.read(File.dirname(__FILE__)+"/fixtures/test_csv.csv")
5
+ @ds = Statsample::CSV.read('test/fixtures/test_csv.csv')
5
6
  end
7
+
6
8
  def test_read
7
- assert_equal(6,@ds.cases)
8
- assert_equal(%w{id name age city a1}, @ds.fields)
9
- id=[1,2,3,4,5,6].to_vector(:scale)
10
- name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
11
- age=[20,23,25,27,5.5,nil].to_vector(:scale)
12
- city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
13
- a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
14
- ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
15
- ds_exp.fields.each{|f|
16
- assert_equal(ds_exp[f],@ds[f])
9
+ header = %w(id name age city a1)
10
+ data = {
11
+ 'id' => [1, 2, 3, 4, 5, 6].to_vector(:scale),
12
+ 'name' => %w(Alex Claude Peter Franz George Fernand).to_vector(:nominal),
13
+ 'age' => [20, 23, 25, 27, 5.5, nil].to_vector(:scale),
14
+ 'city' => ['New York', 'London', 'London', 'Paris', 'Tome', nil].to_vector(:nominal),
15
+ 'a1' => ['a,b', 'b,c', 'a', nil, 'a,b,c', nil].to_vector(:nominal)
17
16
  }
18
- assert_equal(ds_exp,@ds)
19
- end
17
+
18
+ ds_exp = Statsample::Dataset.new(data, header)
19
+
20
+ assert_equal(6, @ds.cases)
21
+ assert_equal(header, @ds.fields)
22
+
23
+ ds_exp.fields.each do |f|
24
+ assert_equal(ds_exp[f], @ds[f])
25
+ end
26
+
27
+ assert_equal(ds_exp, @ds)
28
+ end
29
+
20
30
  def test_nil
21
- assert_equal(nil,@ds['age'][5])
31
+ assert_equal(nil, @ds['age'][5])
22
32
  end
33
+
23
34
  def test_repeated
24
- ds=Statsample::CSV.read(File.dirname(__FILE__)+"/fixtures/repeated_fields.csv")
25
- assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
26
- age=[3,4,5,6,nil,8].to_vector(:scale)
27
- assert_equal(age,ds['age_2'])
28
- end
35
+ ds = Statsample::CSV.read('test/fixtures/repeated_fields.csv')
36
+ assert_equal(%w(id name_1 age_1 city a1 name_2 age_2), ds.fields)
37
+ age = [3, 4, 5, 6, nil, 8].to_vector(:scale)
38
+ assert_equal(age, ds['age_2'])
39
+ end
40
+
41
+ # Testing fix for SciRuby/statsample#19.
42
+ def test_accept_scientific_notation_as_float
43
+ ds = Statsample::CSV.read('test/fixtures/scientific_notation.csv')
44
+ assert_equal(%w(x y), ds.fields)
45
+ y = [9.629587310436753e+127, 1.9341543147883677e+129, 3.88485279048245e+130]
46
+ y.zip(ds['y']).each do |y_expected, y_ds|
47
+ assert_in_delta(y_expected, y_ds)
48
+ end
49
+
50
+ end
51
+
29
52
  def test_write
30
- filename=Tempfile.new("afile")
31
- # filename=Dir::tmpdir+"/test_write.csv"
53
+ filename = Tempfile.new('afile')
32
54
  Statsample::CSV.write(@ds, filename.path)
33
- ds2=Statsample::CSV.read(filename.path)
34
- i=0
35
- ds2.each_array{|row|
36
- assert_equal(@ds.case_as_array(i),row)
37
- i+=1
38
- }
39
- end
40
- end
41
- =begin
42
- class StatsampleCSVTestCase2 < MiniTest::Unit::TestCase
43
- def setup
44
- @ds=Statsample::CSV.read19(File.dirname(__FILE__)+"/fixtures/test_csv.csv")
45
- end
46
- def test_read
47
- assert_equal(6,@ds.cases)
48
- assert_equal(%w{id name age city a1}, @ds.fields)
49
- id=[1,2,3,4,5,6].to_vector(:scale)
50
- name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
51
- age=[20,23,25,27,5.5,nil].to_vector(:scale)
52
- city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
53
- a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
54
- ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
55
- ds_exp.fields.each{|f|
56
- assert_equal(ds_exp[f],@ds[f])
57
- }
58
- assert_equal(ds_exp,@ds)
59
- end
60
- def test_nil
61
- assert_equal(nil,@ds['age'][5])
62
- end
63
- def test_repeated
64
- ds=Statsample::CSV.read19(File.dirname(__FILE__)+"/fixtures/repeated_fields.csv")
65
- assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
66
- age=[3,4,5,6,nil,8].to_vector(:scale)
67
- assert_equal(age,ds['age_2'])
68
- end
69
- def test_write
70
- filename=Tempfile.new("afile")
71
- # filename=Dir::tmpdir+"/test_write.csv"
72
- Statsample::CSV.write(@ds, filename.path)
73
- ds2=Statsample::CSV.read19(filename.path)
74
- i=0
75
- ds2.each_array{|row|
76
- assert_equal(@ds.case_as_array(i),row)
77
- i+=1
78
- }
55
+ ds2 = Statsample::CSV.read(filename.path)
56
+ i = 0
57
+
58
+ ds2.each_array do |row|
59
+ assert_equal(@ds.case_as_array(i), row)
60
+ i += 1
61
+ end
79
62
  end
80
63
  end
81
- =end
data/test/test_dataset.rb CHANGED
@@ -1,188 +1,199 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
- class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleDatasetTestCase < Minitest::Test
3
3
  def setup
4
- @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
5
- 'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
6
- 'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
4
+ @ds = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([1, 2, 3, 4, 5]), 'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)), 'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
5
+ 'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
6
+ 'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) }, %w(id name age city a1))
7
7
  end
8
+
8
9
  def test_nest
9
- ds={
10
- 'a'=>%w{a a a b b b}.to_vector,
11
- 'b'=>%w{c c d d e e}.to_vector,
12
- 'c'=>%w{f g h i j k}.to_vector
10
+ ds = {
11
+ 'a' => %w(a a a b b b).to_vector,
12
+ 'b' => %w(c c d d e e).to_vector,
13
+ 'c' => %w(f g h i j k).to_vector
13
14
  }.to_dataset
14
- nest=ds.nest('a','b')
15
- assert_equal([{'c'=>'f'},{'c'=>'g'}], nest['a']['c'])
16
- assert_equal([{'c'=>'h'}], nest['a']['d'])
17
- assert_equal([{'c'=>'j'},{'c'=>'k'}], nest['b']['e'])
18
-
15
+ nest = ds.nest('a', 'b')
16
+ assert_equal([{ 'c' => 'f' }, { 'c' => 'g' }], nest['a']['c'])
17
+ assert_equal([{ 'c' => 'h' }], nest['a']['d'])
18
+ assert_equal([{ 'c' => 'j' }, { 'c' => 'k' }], nest['b']['e'])
19
19
  end
20
+
20
21
  def test_should_have_summary
21
- assert(@ds.summary.size>0)
22
+ assert(@ds.summary.size > 0)
22
23
  end
24
+
23
25
  def test_basic
24
- assert_equal(5,@ds.cases)
25
- assert_equal(%w{id name age city a1}, @ds.fields)
26
+ assert_equal(5, @ds.cases)
27
+ assert_equal(%w(id name age city a1), @ds.fields)
26
28
  end
29
+
27
30
  def test_saveload
28
- outfile=Tempfile.new("dataset.ds")
31
+ outfile = Tempfile.new('dataset.ds')
29
32
  @ds.save(outfile.path)
30
- a=Statsample.load(outfile.path)
31
- assert_equal(@ds,a)
33
+ a = Statsample.load(outfile.path)
34
+ assert_equal(@ds, a)
32
35
  end
36
+
33
37
  def test_gsl
34
38
  if Statsample.has_gsl?
35
- matrix=GSL::Matrix[[1,2],[3,4],[5,6]]
36
- ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
37
- assert_equal(matrix,ds.to_gsl)
39
+ matrix = GSL::Matrix[[1, 2], [3, 4], [5, 6]]
40
+ ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
41
+ assert_equal(matrix, ds.to_gsl)
38
42
  else
39
- skip("Gsl needed")
43
+ skip('Gsl needed')
40
44
  end
41
45
  end
46
+
42
47
  def test_matrix
43
- matrix=Matrix[[1,2],[3,4],[5,6]]
44
- ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
45
- assert_equal(matrix,ds.to_matrix)
48
+ matrix = Matrix[[1, 2], [3, 4], [5, 6]]
49
+ ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
50
+ assert_equal(matrix, ds.to_matrix)
46
51
  end
47
52
 
48
53
  def test_fields
49
- @ds.fields=%w{name a1 id age city}
50
- assert_equal(%w{name a1 id age city}, @ds.fields)
51
- @ds.fields=%w{id name age}
52
- assert_equal(%w{id name age a1 city}, @ds.fields)
54
+ @ds.fields = %w(name a1 id age city)
55
+ assert_equal(%w(name a1 id age city), @ds.fields)
56
+ @ds.fields = %w(id name age)
57
+ assert_equal(%w(id name age a1 city), @ds.fields)
53
58
  end
59
+
54
60
  def test_merge
55
- a=[1,2,3].to_scale
56
- b=[3,4,5].to_vector
57
- c=[4,5,6].to_scale
58
- d=[7,8,9].to_vector
59
- e=[10,20,30].to_vector
60
- ds1={'a'=>a,'b'=>b}.to_dataset
61
- ds2={'c'=>c,'d'=>d}.to_dataset
62
- exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
63
-
64
- assert_equal(exp,ds1.merge(ds2))
65
- exp.fields=%w{c d a b}
66
- assert_equal(exp,ds2.merge(ds1))
67
- ds3={'a'=>e}.to_dataset
68
- exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
69
- exp.fields=%w{a_1 b a_2}
70
- assert_equal(exp,ds1.merge(ds3))
61
+ a = [1, 2, 3].to_scale
62
+ b = [3, 4, 5].to_vector
63
+ c = [4, 5, 6].to_scale
64
+ d = [7, 8, 9].to_vector
65
+ e = [10, 20, 30].to_vector
66
+ ds1 = { 'a' => a, 'b' => b }.to_dataset
67
+ ds2 = { 'c' => c, 'd' => d }.to_dataset
68
+ exp = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
69
+
70
+ assert_equal(exp, ds1.merge(ds2))
71
+ exp.fields = %w(c d a b)
72
+ assert_equal(exp, ds2.merge(ds1))
73
+ ds3 = { 'a' => e }.to_dataset
74
+ exp = { 'a_1' => a, 'b' => b, 'a_2' => e }.to_dataset
75
+ exp.fields = %w(a_1 b a_2)
76
+ assert_equal(exp, ds1.merge(ds3))
71
77
  end
78
+
72
79
  def test_each_vector
73
- a=[1,2,3].to_vector
74
- b=[3,4,5].to_vector
75
- fields=["a","b"]
76
- ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
77
- res=[]
78
- ds.each_vector{|k,v|
79
- res.push([k,v])
80
+ a = [1, 2, 3].to_vector
81
+ b = [3, 4, 5].to_vector
82
+ fields = %w(a b)
83
+ ds = Statsample::Dataset.new({ 'a' => a, 'b' => b }, fields)
84
+ res = []
85
+ ds.each_vector{|k, v|
86
+ res.push([k, v])
80
87
  }
81
- assert_equal([["a",a],["b",b]],res)
82
- ds.fields=["b","a"]
83
- res=[]
84
- ds.each_vector{|k,v|
85
- res.push([k,v])
88
+ assert_equal([['a', a], ['b', b]], res)
89
+ ds.fields = %w(b a)
90
+ res = []
91
+ ds.each_vector{|k, v|
92
+ res.push([k, v])
86
93
  }
87
- assert_equal([["b",b],["a",a]],res)
94
+ assert_equal([['b', b], ['a', a]], res)
88
95
  end
96
+
89
97
  def test_equality
90
- v1=[1,2,3,4].to_vector
91
- v2=[5,6,7,8].to_vector
92
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
93
- v3=[1,2,3,4].to_vector
94
- v4=[5,6,7,8].to_vector
95
- ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
96
- assert_equal(ds1,ds2)
97
- ds2.fields=%w{v1 v2}
98
- assert_not_equal(ds1,ds2)
98
+ v1 = [1, 2, 3, 4].to_vector
99
+ v2 = [5, 6, 7, 8].to_vector
100
+ ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
101
+ v3 = [1, 2, 3, 4].to_vector
102
+ v4 = [5, 6, 7, 8].to_vector
103
+ ds2 = Statsample::Dataset.new({ 'v1' => v3, 'v2' => v4 }, %w(v2 v1))
104
+ assert_equal(ds1, ds2)
105
+ ds2.fields = %w(v1 v2)
106
+ assert_not_equal(ds1, ds2)
99
107
  end
108
+
100
109
  def test_add_vector
101
- v=Statsample::Vector.new(%w{a b c d e})
102
- @ds.add_vector('new',v)
103
- assert_equal(%w{id name age city a1 new},@ds.fields)
104
- x=Statsample::Vector.new(%w{a b c d e f g})
110
+ v = Statsample::Vector.new(%w(a b c d e))
111
+ @ds.add_vector('new', v)
112
+ assert_equal(%w(id name age city a1 new), @ds.fields)
113
+ x = Statsample::Vector.new(%w(a b c d e f g))
105
114
  assert_raise ArgumentError do
106
- @ds.add_vector('new2',x)
115
+ @ds.add_vector('new2', x)
107
116
  end
108
117
  end
118
+
109
119
  def test_vector_by_calculation
110
- a1=[1,2,3,4,5,6,7].to_vector(:scale)
111
- a2=[10,20,30,40,50,60,70].to_vector(:scale)
112
- a3=[100,200,300,400,500,600,700].to_vector(:scale)
113
- ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
114
- total=ds.vector_by_calculation() {|row|
115
- row['a1']+row['a2']+row['a3']
120
+ a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:scale)
121
+ a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:scale)
122
+ a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:scale)
123
+ ds = { 'a1' => a1, 'a2' => a2, 'a3' => a3 }.to_dataset
124
+ total = ds.vector_by_calculation {|row|
125
+ row['a1'] + row['a2'] + row['a3']
116
126
  }
117
- expected=[111,222,333,444,555,666,777].to_vector(:scale)
118
- assert_equal(expected,total)
127
+ expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:scale)
128
+ assert_equal(expected, total)
119
129
  end
130
+
120
131
  def test_vector_sum
121
- a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
122
- a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
123
- b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
124
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
125
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
126
- total=ds.vector_sum
127
- a=ds.vector_sum(['a1','a2'])
128
- b=ds.vector_sum(['b1','b2'])
129
- expected_a=[11,12,23,24,25,nil].to_vector(:scale)
130
- expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
131
- expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
132
+ a1 = [1, 2, 3, 4, 5, nil].to_vector(:scale)
133
+ a2 = [10, 10, 20, 20, 20, 30].to_vector(:scale)
134
+ b1 = [nil, 1, 1, 1, 1, 2].to_vector(:scale)
135
+ b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
136
+ ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2 }.to_dataset
137
+ total = ds.vector_sum
138
+ a = ds.vector_sum(%w(a1 a2))
139
+ b = ds.vector_sum(%w(b1 b2))
140
+ expected_a = [11, 12, 23, 24, 25, nil].to_vector(:scale)
141
+ expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:scale)
142
+ expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:scale)
132
143
  assert_equal(expected_a, a)
133
144
  assert_equal(expected_b, b)
134
145
  assert_equal(expected_total, total)
135
146
  end
147
+
136
148
  def test_vector_missing_values
137
- a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
138
- a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
139
- b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
140
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
141
- c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
142
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
143
- mva=[2,3,0,1,0,1].to_vector(:scale)
144
- assert_equal(mva,ds.vector_missing_values)
145
- end
146
-
149
+ a1 = [1, nil, 3, 4, 5, nil].to_vector(:scale)
150
+ a2 = [10, nil, 20, 20, 20, 30].to_vector(:scale)
151
+ b1 = [nil, nil, 1, 1, 1, 2].to_vector(:scale)
152
+ b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
153
+ c = [nil, 2, 4, 2, 2, 2].to_vector(:scale)
154
+ ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
155
+ mva = [2, 3, 0, 1, 0, 1].to_vector(:scale)
156
+ assert_equal(mva, ds.vector_missing_values)
157
+ end
158
+
147
159
  def test_has_missing_values
148
- a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
149
- a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
150
- b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
151
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
152
- c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
153
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
160
+ a1 = [1, nil, 3, 4, 5, nil].to_vector(:scale)
161
+ a2 = [10, nil, 20, 20, 20, 30].to_vector(:scale)
162
+ b1 = [nil, nil, 1, 1, 1, 2].to_vector(:scale)
163
+ b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
164
+ c = [nil, 2, 4, 2, 2, 2].to_vector(:scale)
165
+ ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
154
166
  assert(ds.has_missing_data?)
155
- clean=ds.dup_only_valid
167
+ clean = ds.dup_only_valid
156
168
  assert(!clean.has_missing_data?)
157
169
  end
158
-
159
-
160
- def test_vector_count_characters
161
- a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
162
- a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
163
- b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
164
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
165
- c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
166
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
167
- exp=[4,17,27,5,6,5].to_vector(:scale)
168
- assert_equal(exp,ds.vector_count_characters)
169
170
 
171
+ def test_vector_count_characters
172
+ a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:scale)
173
+ a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:scale)
174
+ b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:scale)
175
+ b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
176
+ c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:scale)
177
+ ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
178
+ exp = [4, 17, 27, 5, 6, 5].to_vector(:scale)
179
+ assert_equal(exp, ds.vector_count_characters)
170
180
  end
181
+
171
182
  def test_vector_mean
172
- a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
173
- a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
174
- b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
175
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
176
- c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
177
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
178
- total=ds.vector_mean
179
- a=ds.vector_mean(['a1','a2'],1)
180
- b=ds.vector_mean(['b1','b2'],1)
181
- c=ds.vector_mean(['b1','b2','c'],1)
182
- expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
183
- expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
184
- expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
185
- expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
183
+ a1 = [1, 2, 3, 4, 5, nil].to_vector(:scale)
184
+ a2 = [10, 10, 20, 20, 20, 30].to_vector(:scale)
185
+ b1 = [nil, 1, 1, 1, 1, 2].to_vector(:scale)
186
+ b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
187
+ c = [nil, 2, 4, 2, 2, 2].to_vector(:scale)
188
+ ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
189
+ total = ds.vector_mean
190
+ a = ds.vector_mean(%w(a1 a2), 1)
191
+ b = ds.vector_mean(%w(b1 b2), 1)
192
+ c = ds.vector_mean(%w(b1 b2 c), 1)
193
+ expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:scale)
194
+ expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:scale)
195
+ expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:scale)
196
+ expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:scale)
186
197
  assert_equal(expected_a, a)
187
198
  assert_equal(expected_b, b)
188
199
  assert_equal(expected_c, c)
@@ -190,273 +201,279 @@ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
190
201
  end
191
202
 
192
203
  def test_each_array
193
- expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
194
- out=[]
204
+ expected = [[1, 'Alex', 20, 'New York', 'a,b'], [2, 'Claude', 23, 'London', 'b,c'], [3, 'Peter', 25, 'London', 'a'], [4, 'Franz', 27, 'Paris', nil], [5, 'George', 5, 'Tome', 'a,b,c']]
205
+ out = []
195
206
  @ds.each_array{ |a|
196
207
  out.push(a)
197
208
  }
198
- assert_equal(expected,out)
209
+ assert_equal(expected, out)
199
210
  end
211
+
200
212
  def test_recode
201
- @ds['age'].type=:scale
202
- @ds.recode!("age") {|c| c['id']*2}
203
- expected=[2,4,6,8,10].to_vector(:scale)
204
- assert_equal(expected,@ds['age'])
213
+ @ds['age'].type = :scale
214
+ @ds.recode!('age') { |c| c['id'] * 2 }
215
+ expected = [2, 4, 6, 8, 10].to_vector(:scale)
216
+ assert_equal(expected, @ds['age'])
205
217
  end
218
+
206
219
  def test_case_as
207
- assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds.case_as_hash(0))
208
- assert_equal([5,'George',5,'Tome','a,b,c'],@ds.case_as_array(4))
220
+ assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds.case_as_hash(0))
221
+ assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds.case_as_array(4))
209
222
  # Native methods
210
- assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds._case_as_hash(0))
211
- assert_equal([5,'George',5,'Tome','a,b,c'],@ds._case_as_array(4))
212
-
213
-
214
-
223
+ assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds._case_as_hash(0))
224
+ assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds._case_as_array(4))
215
225
  end
226
+
216
227
  def test_delete_vector
217
228
  @ds.delete_vector('name')
218
- assert_equal(%w{id age city a1},@ds.fields)
219
- assert_equal(%w{a1 age city id},@ds.vectors.keys.sort)
229
+ assert_equal(%w(id age city a1), @ds.fields)
230
+ assert_equal(%w(a1 age city id), @ds.vectors.keys.sort)
220
231
  end
232
+
221
233
  def test_change_type
222
- @ds.col('age').type=:scale
223
- assert_equal(:scale,@ds.col('age').type)
234
+ @ds.col('age').type = :scale
235
+ assert_equal(:scale, @ds.col('age').type)
224
236
  end
237
+
225
238
  def test_split_by_separator_recode
226
- @ds.add_vectors_by_split_recode("a1","_")
227
- assert_equal(%w{id name age city a1 a1_1 a1_2 a1_3},@ds.fields)
228
- assert_equal([1,0,1,nil,1],@ds.col('a1_1').to_a)
229
- assert_equal([1,1,0,nil,1],@ds.col('a1_2').to_a)
230
- assert_equal([0,1,0,nil,1],@ds.col('a1_3').to_a)
231
- {'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'}.each do |k,v|
239
+ @ds.add_vectors_by_split_recode('a1', '_')
240
+ assert_equal(%w(id name age city a1 a1_1 a1_2 a1_3), @ds.fields)
241
+ assert_equal([1, 0, 1, nil, 1], @ds.col('a1_1').to_a)
242
+ assert_equal([1, 1, 0, nil, 1], @ds.col('a1_2').to_a)
243
+ assert_equal([0, 1, 0, nil, 1], @ds.col('a1_3').to_a)
244
+ { 'a1_1' => 'a1:a', 'a1_2' => 'a1:b', 'a1_3' => 'a1:c' }.each do |k, v|
232
245
  assert_equal(v, @ds[k].name)
233
246
  end
234
247
  end
248
+
235
249
  def test_split_by_separator
236
- @ds.add_vectors_by_split("a1","_")
237
- assert_equal(%w{id name age city a1 a1_a a1_b a1_c},@ds.fields)
238
- assert_equal([1,0,1,nil,1],@ds.col('a1_a').to_a)
239
- assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
240
- assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
250
+ @ds.add_vectors_by_split('a1', '_')
251
+ assert_equal(%w(id name age city a1 a1_a a1_b a1_c), @ds.fields)
252
+ assert_equal([1, 0, 1, nil, 1], @ds.col('a1_a').to_a)
253
+ assert_equal([1, 1, 0, nil, 1], @ds.col('a1_b').to_a)
254
+ assert_equal([0, 1, 0, nil, 1], @ds.col('a1_c').to_a)
241
255
  end
256
+
242
257
  def test_percentiles
243
- v1=(1..100).to_a.to_scale
244
- assert_equal(50.5,v1.median)
258
+ v1 = (1..100).to_a.to_scale
259
+ assert_equal(50.5, v1.median)
245
260
  assert_equal(25.5, v1.percentil(25))
246
- v2=(1..99).to_a.to_scale
247
- assert_equal(50,v2.median)
248
- assert_equal(25,v2.percentil(25))
249
- v3=(1..50).to_a.to_scale
261
+ v2 = (1..99).to_a.to_scale
262
+ assert_equal(50, v2.median)
263
+ assert_equal(25, v2.percentil(25))
264
+ v3 = (1..50).to_a.to_scale
250
265
  assert_equal(25.5, v3.median)
251
266
  assert_equal(13, v3.percentil(25))
252
-
253
267
  end
268
+
254
269
  def test_add_case
255
- ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
256
- ds.add_case([1,2,3])
257
- ds.add_case({'a'=>4,'b'=>5,'c'=>6})
258
- ds.add_case([[7,8,9],%w{a b c}])
259
- assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
260
- assert_equal([4,5,6],ds.case_as_array(1))
261
- assert_equal([7,8,9],ds.case_as_array(2))
262
- assert_equal(['a','b','c'],ds.case_as_array(3))
263
- ds.add_case_array([6,7,1])
270
+ ds = Statsample::Dataset.new('a' => [].to_vector, 'b' => [].to_vector, 'c' => [].to_vector)
271
+ ds.add_case([1, 2, 3])
272
+ ds.add_case('a' => 4, 'b' => 5, 'c' => 6)
273
+ ds.add_case([[7, 8, 9], %w(a b c)])
274
+ assert_equal({ 'a' => 1, 'b' => 2, 'c' => 3 }, ds.case_as_hash(0))
275
+ assert_equal([4, 5, 6], ds.case_as_array(1))
276
+ assert_equal([7, 8, 9], ds.case_as_array(2))
277
+ assert_equal(%w(a b c), ds.case_as_array(3))
278
+ ds.add_case_array([6, 7, 1])
264
279
  ds.update_valid_data
265
- assert_equal([6,7,1],ds.case_as_array(4))
266
-
280
+ assert_equal([6, 7, 1], ds.case_as_array(4))
267
281
  end
282
+
268
283
  def test_marshaling
269
- ds_marshal=Marshal.load(Marshal.dump(@ds))
270
- assert_equal(ds_marshal,@ds)
284
+ ds_marshal = Marshal.load(Marshal.dump(@ds))
285
+ assert_equal(ds_marshal, @ds)
271
286
  end
272
- def test_range
273
- v1=[1,2,3,4].to_vector
274
- v2=[5,6,7,8].to_vector
275
- v3=[9,10,11,12].to_vector
276
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
277
- assert_same(v1,ds1['v1'])
278
- ds2=ds1["v2".."v1"]
279
- assert_equal(%w{v2 v1},ds2.fields)
280
- assert_same(ds1['v1'],ds2['v1'])
281
- assert_same(ds1['v2'],ds2['v2'])
282
-
283
287
 
288
+ def test_range
289
+ v1 = [1, 2, 3, 4].to_vector
290
+ v2 = [5, 6, 7, 8].to_vector
291
+ v3 = [9, 10, 11, 12].to_vector
292
+ ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2, 'v3' => v3 }, %w(v3 v2 v1))
293
+ assert_same(v1, ds1['v1'])
294
+ ds2 = ds1['v2'..'v1']
295
+ assert_equal(%w(v2 v1), ds2.fields)
296
+ assert_same(ds1['v1'], ds2['v1'])
297
+ assert_same(ds1['v2'], ds2['v2'])
284
298
  end
299
+
285
300
  def test_clone
286
- v1=[1,2,3,4].to_vector
287
- v2=[5,6,7,8].to_vector
288
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
289
- ds2=ds1.clone
290
- assert_equal(ds1,ds2)
291
- assert_not_same(ds1,ds2)
292
- assert_equal(ds1['v1'],ds2['v1'])
301
+ v1 = [1, 2, 3, 4].to_vector
302
+ v2 = [5, 6, 7, 8].to_vector
303
+ ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
304
+ ds2 = ds1.clone
305
+ assert_equal(ds1, ds2)
306
+ assert_not_same(ds1, ds2)
307
+ assert_equal(ds1['v1'], ds2['v1'])
293
308
  assert_same(ds1['v1'], ds2['v1'])
294
- assert_equal(ds1.fields,ds2.fields)
295
- assert_not_same(ds1.fields,ds2.fields)
296
- assert_equal(ds1.cases,ds2.cases)
309
+ assert_equal(ds1.fields, ds2.fields)
310
+ assert_not_same(ds1.fields, ds2.fields)
311
+ assert_equal(ds1.cases, ds2.cases)
297
312
 
298
313
  # partial clone
299
- ds3=ds1.clone('v1')
300
- ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
301
- assert_equal(ds_exp,ds3)
302
- assert_not_same(ds_exp,ds3)
303
- assert_equal(ds3['v1'],ds_exp['v1'])
304
- assert_same(ds3['v1'],ds_exp['v1'])
305
- assert_equal(ds3.fields,ds_exp.fields)
306
- assert_equal(ds3.cases,ds_exp.cases)
307
-
308
- assert_not_same(ds3.fields,ds_exp.fields)
309
-
314
+ ds3 = ds1.clone('v1')
315
+ ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
316
+ assert_equal(ds_exp, ds3)
317
+ assert_not_same(ds_exp, ds3)
318
+ assert_equal(ds3['v1'], ds_exp['v1'])
319
+ assert_same(ds3['v1'], ds_exp['v1'])
320
+ assert_equal(ds3.fields, ds_exp.fields)
321
+ assert_equal(ds3.cases, ds_exp.cases)
322
+
323
+ assert_not_same(ds3.fields, ds_exp.fields)
310
324
  end
325
+
311
326
  def test_dup
312
- v1=[1,2,3,4].to_vector
313
- v2=[5,6,7,8].to_vector
314
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
315
- ds2=ds1.dup
316
- assert_equal(ds1,ds2)
317
- assert_not_same(ds1,ds2)
318
- assert_equal(ds1['v1'],ds2['v1'])
319
- assert_not_same(ds1['v1'],ds2['v1'])
320
- assert_equal(ds1.cases,ds2.cases)
321
-
322
- assert_equal(ds1.fields,ds2.fields)
323
- assert_not_same(ds1.fields,ds2.fields)
324
- ds1['v1'].type=:scale
325
- # dup partial
326
- ds3=ds1.dup('v1')
327
- ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
328
- assert_equal(ds_exp,ds3)
329
- assert_not_same(ds_exp,ds3)
330
- assert_equal(ds3['v1'],ds_exp['v1'])
331
- assert_not_same(ds3['v1'],ds_exp['v1'])
332
- assert_equal(ds3.fields,ds_exp.fields)
333
- assert_equal(ds3.cases,ds_exp.cases)
327
+ v1 = [1, 2, 3, 4].to_vector
328
+ v2 = [5, 6, 7, 8].to_vector
329
+ ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
330
+ ds2 = ds1.dup
331
+ assert_equal(ds1, ds2)
332
+ assert_not_same(ds1, ds2)
333
+ assert_equal(ds1['v1'], ds2['v1'])
334
+ assert_not_same(ds1['v1'], ds2['v1'])
335
+ assert_equal(ds1.cases, ds2.cases)
334
336
 
335
- assert_not_same(ds3.fields,ds_exp.fields)
337
+ assert_equal(ds1.fields, ds2.fields)
338
+ assert_not_same(ds1.fields, ds2.fields)
339
+ ds1['v1'].type = :scale
340
+ # dup partial
341
+ ds3 = ds1.dup('v1')
342
+ ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
343
+ assert_equal(ds_exp, ds3)
344
+ assert_not_same(ds_exp, ds3)
345
+ assert_equal(ds3['v1'], ds_exp['v1'])
346
+ assert_not_same(ds3['v1'], ds_exp['v1'])
347
+ assert_equal(ds3.fields, ds_exp.fields)
348
+ assert_equal(ds3.cases, ds_exp.cases)
336
349
 
350
+ assert_not_same(ds3.fields, ds_exp.fields)
337
351
 
338
352
  # empty
339
- ds3=ds1.dup_empty
340
- assert_not_equal(ds1,ds3)
341
- assert_not_equal(ds1['v1'],ds3['v1'])
342
- assert_equal([],ds3['v1'].data)
343
- assert_equal([],ds3['v2'].data)
344
- assert_equal(:scale,ds3['v1'].type)
345
- assert_equal(ds1.fields,ds2.fields)
346
- assert_not_same(ds1.fields,ds2.fields)
353
+ ds3 = ds1.dup_empty
354
+ assert_not_equal(ds1, ds3)
355
+ assert_not_equal(ds1['v1'], ds3['v1'])
356
+ assert_equal([], ds3['v1'].data)
357
+ assert_equal([], ds3['v2'].data)
358
+ assert_equal(:scale, ds3['v1'].type)
359
+ assert_equal(ds1.fields, ds2.fields)
360
+ assert_not_same(ds1.fields, ds2.fields)
347
361
  end
362
+
348
363
  def test_from_to
349
- assert_equal(%w{name age city}, @ds.from_to("name","city"))
364
+ assert_equal(%w(name age city), @ds.from_to('name', 'city'))
350
365
  assert_raise ArgumentError do
351
- @ds.from_to("name","a2")
366
+ @ds.from_to('name', 'a2')
352
367
  end
353
368
  end
369
+
354
370
  def test_each_array_with_nils
355
- v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
356
- v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
357
- v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
358
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
359
- ds2=ds1.dup_empty
371
+ v1 = [1, -99, 3, 4, 'na'].to_vector(:scale, missing_values: [-99, 'na'])
372
+ v2 = [5, 6, -99, 8, 20].to_vector(:scale, missing_values: [-99])
373
+ v3 = [9, 10, 11, 12, 20].to_vector(:scale, missing_values: [-99])
374
+ ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
375
+ ds2 = ds1.dup_empty
360
376
  ds1.each_array_with_nils {|row|
361
377
  ds2.add_case_array(row)
362
378
  }
363
379
  ds2.update_valid_data
364
- assert_equal([1,nil,3,4,nil],ds2['v1'].data)
365
- assert_equal([5,6,nil,8,20],ds2['v2'].data)
380
+ assert_equal([1, nil, 3, 4, nil], ds2['v1'].data)
381
+ assert_equal([5, 6, nil, 8, 20], ds2['v2'].data)
366
382
  end
383
+
367
384
  def test_dup_only_valid
368
- v1=[1,nil,3,4].to_vector(:scale)
369
- v2=[5,6,nil,8].to_vector(:scale)
370
- v3=[9,10,11,12].to_vector(:scale)
371
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
372
- ds2=ds1.dup_only_valid
373
- expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
374
- assert_equal(expected,ds2)
375
- assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
376
- expected_partial=Statsample::Dataset.new({'v1'=>[1,3,4].to_vector(:scale), 'v3'=>[9, 11,12].to_vector(:scale)})
377
- assert_equal(expected_partial, ds1.dup_only_valid(%w{v1 v3}))
378
-
379
-
385
+ v1 = [1, nil, 3, 4].to_vector(:scale)
386
+ v2 = [5, 6, nil, 8].to_vector(:scale)
387
+ v3 = [9, 10, 11, 12].to_vector(:scale)
388
+ ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
389
+ ds2 = ds1.dup_only_valid
390
+ expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:scale), 'v2' => [5, 8].to_vector(:scale), 'v3' => [9, 12].to_vector(:scale))
391
+ assert_equal(expected, ds2)
392
+ assert_equal(expected.vectors.values, Statsample.only_valid(v1, v2, v3))
393
+ expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:scale), 'v3' => [9, 11, 12].to_vector(:scale))
394
+ assert_equal(expected_partial, ds1.dup_only_valid(%w(v1 v3)))
380
395
  end
396
+
381
397
  def test_filter
382
- @ds['age'].type=:scale
383
- filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
384
- expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
385
- 'city'=>Statsample::Vector.new(['London','Paris']),
386
- 'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
387
- assert_equal(expected,filtered)
398
+ @ds['age'].type = :scale
399
+ filtered = @ds.filter { |c| c['id'] == 2 or c['id'] == 4 }
400
+ expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :scale),
401
+ 'city' => Statsample::Vector.new(%w(London Paris)),
402
+ 'a1' => Statsample::Vector.new(['b,c', nil]) }, %w(id name age city a1))
403
+ assert_equal(expected, filtered)
388
404
  end
389
- def test_filter_field
390
- @ds['age'].type=:scale
391
- filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
392
- expected=[2,4].to_vector
393
- assert_equal(expected,filtered)
394
405
 
406
+ def test_filter_field
407
+ @ds['age'].type = :scale
408
+ filtered = @ds.filter_field('id') { |c| c['id'] == 2 or c['id'] == 4 }
409
+ expected = [2, 4].to_vector
410
+ assert_equal(expected, filtered)
395
411
  end
412
+
396
413
  def test_verify
397
- name=%w{r1 r2 r3 r4}.to_vector(:nominal)
398
- v1=[1,2,3,4].to_vector(:scale)
399
- v2=[4,3,2,1].to_vector(:scale)
400
- v3=[10,20,30,40].to_vector(:scale)
401
- v4=%w{a b a b}.to_vector(:nominal)
402
- ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
403
- ds.fields=%w{v1 v2 v3 v4 id}
404
- #Correct
405
- t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
406
- t2=create_test("v3=v1*10") {|r| r['v3']==r['v1']*10}
414
+ name = %w(r1 r2 r3 r4).to_vector(:nominal)
415
+ v1 = [1, 2, 3, 4].to_vector(:scale)
416
+ v2 = [4, 3, 2, 1].to_vector(:scale)
417
+ v3 = [10, 20, 30, 40].to_vector(:scale)
418
+ v4 = %w(a b a b).to_vector(:nominal)
419
+ ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'id' => name }.to_dataset
420
+ ds.fields = %w(v1 v2 v3 v4 id)
421
+ # Correct
422
+ t1 = create_test('If v4=a, v1 odd') { |r| r['v4'] == 'b' or (r['v4'] == 'a' and r['v1'].odd?) }
423
+ t2 = create_test('v3=v1*10') { |r| r['v3'] == r['v1'] * 10 }
407
424
  # Fail!
408
- t3=create_test("v4='b'") {|r| r['v4']=='b'}
409
- exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
410
- exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
411
- res=ds.verify(t3,t1,t2)
412
- assert_equal(exp1,res)
413
- res=ds.verify('id',t1,t2,t3)
414
- assert_equal(exp2,res)
425
+ t3 = create_test("v4='b'") { |r| r['v4'] == 'b' }
426
+ exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
427
+ exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
428
+ res = ds.verify(t3, t1, t2)
429
+ assert_equal(exp1, res)
430
+ res = ds.verify('id', t1, t2, t3)
431
+ assert_equal(exp2, res)
415
432
  end
416
- def test_compute_operation
417
- v1=[1,2,3,4].to_vector(:scale)
418
- v2=[4,3,2,1].to_vector(:scale)
419
- v3=[10,20,30,40].to_vector(:scale)
420
- vscale=[1.quo(2),1,3.quo(2),2].to_vector(:scale)
421
- vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
422
- vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
423
- ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
424
- assert_equal(vscale,ds.compute("v1/2"))
425
- assert_equal(vsum,ds.compute("v1+v2+v3"))
426
- assert_equal(vmult,ds.compute("v1*v2"))
427
433
 
434
+ def test_compute_operation
435
+ v1 = [1, 2, 3, 4].to_vector(:scale)
436
+ v2 = [4, 3, 2, 1].to_vector(:scale)
437
+ v3 = [10, 20, 30, 40].to_vector(:scale)
438
+ vscale = [1.quo(2), 1, 3.quo(2), 2].to_vector(:scale)
439
+ vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:scale)
440
+ vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:scale)
441
+ ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3 }.to_dataset
442
+ assert_equal(vscale, ds.compute('v1/2'))
443
+ assert_equal(vsum, ds.compute('v1+v2+v3'))
444
+ assert_equal(vmult, ds.compute('v1*v2'))
428
445
  end
446
+
429
447
  def test_crosstab_with_asignation
430
- v1=%w{a a a b b b c c c}.to_vector
431
- v2=%w{a b c a b c a b c}.to_vector
432
- v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
433
- ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
448
+ v1 = %w(a a a b b b c c c).to_vector
449
+ v2 = %w(a b c a b c a b c).to_vector
450
+ v3 = %w(0 1 0 0 1 1 0 0 1).to_scale
451
+ ds = Statsample::Dataset.crosstab_by_asignation(v1, v2, v3)
434
452
  assert_equal(:nominal, ds['_id'].type)
435
453
  assert_equal(:scale, ds['a'].type)
436
454
  assert_equal(:scale, ds['b'].type)
437
- ev_id=%w{a b c}.to_vector
438
- ev_a =%w{0 0 0}.to_scale
439
- ev_b =%w{1 1 0}.to_scale
440
- ev_c =%w{0 1 1}.to_scale
441
- ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
455
+ ev_id = %w(a b c).to_vector
456
+ ev_a = %w(0 0 0).to_scale
457
+ ev_b = %w(1 1 0).to_scale
458
+ ev_c = %w(0 1 1).to_scale
459
+ ds2 = { '_id' => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }.to_dataset
442
460
  assert_equal(ds, ds2)
443
461
  end
462
+
444
463
  def test_one_to_many
445
- cases=[
446
- ['1','george','red',10,'blue',20,nil,nil],
447
- ['2','fred','green',15,'orange',30,'white',20],
448
- ['3','alfred',nil,nil,nil,nil,nil,nil]
464
+ cases = [
465
+ ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
466
+ ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
467
+ ['3', 'alfred', nil, nil, nil, nil, nil, nil]
449
468
  ]
450
- ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
451
- cases.each {|c| ds.add_case_array c }
469
+ ds = Statsample::Dataset.new(%w(id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3))
470
+ cases.each { |c| ds.add_case_array c }
452
471
  ds.update_valid_data
453
- ids=%w{1 1 2 2 2}.to_vector
454
- colors=%w{red blue green orange white}.to_vector
455
- values=[10,20,15,30,20].to_vector
456
- col_ids=[1,2,1,2,3].to_scale
457
- ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
458
- assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
459
-
472
+ ids = %w(1 1 2 2 2).to_vector
473
+ colors = %w(red blue green orange white).to_vector
474
+ values = [10, 20, 15, 30, 20].to_vector
475
+ col_ids = [1, 2, 1, 2, 3].to_scale
476
+ ds_expected = { 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values }.to_dataset(%w(id _col_id color value))
477
+ assert_equal(ds_expected, ds.one_to_many(%w(id), 'car_%v%n'))
460
478
  end
461
-
462
479
  end