statsample 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -3
  3. data/History.txt +4 -0
  4. data/README.md +4 -0
  5. data/lib/statsample/converter/csv.rb +41 -54
  6. data/lib/statsample/converters.rb +18 -19
  7. data/lib/statsample/version.rb +1 -1
  8. data/test/fixtures/scientific_notation.csv +4 -0
  9. data/test/helpers_tests.rb +37 -38
  10. data/test/test_analysis.rb +96 -97
  11. data/test/test_anova_contrast.rb +22 -22
  12. data/test/test_anovaoneway.rb +12 -12
  13. data/test/test_anovatwoway.rb +16 -17
  14. data/test/test_anovatwowaywithdataset.rb +22 -24
  15. data/test/test_anovawithvectors.rb +67 -69
  16. data/test/test_awesome_print_bug.rb +9 -9
  17. data/test/test_bartlettsphericity.rb +13 -13
  18. data/test/test_bivariate.rb +122 -126
  19. data/test/test_codification.rb +51 -49
  20. data/test/test_crosstab.rb +44 -40
  21. data/test/test_csv.rb +52 -70
  22. data/test/test_dataset.rb +347 -330
  23. data/test/test_dominance_analysis.rb +22 -24
  24. data/test/test_factor.rb +163 -166
  25. data/test/test_factor_map.rb +25 -30
  26. data/test/test_factor_pa.rb +28 -28
  27. data/test/test_ggobi.rb +19 -18
  28. data/test/test_gsl.rb +13 -15
  29. data/test/test_histogram.rb +74 -77
  30. data/test/test_matrix.rb +29 -31
  31. data/test/test_multiset.rb +132 -126
  32. data/test/test_regression.rb +143 -149
  33. data/test/test_reliability.rb +149 -155
  34. data/test/test_reliability_icc.rb +100 -104
  35. data/test/test_reliability_skillscale.rb +38 -40
  36. data/test/test_resample.rb +14 -12
  37. data/test/test_rserve_extension.rb +33 -33
  38. data/test/test_srs.rb +5 -5
  39. data/test/test_statistics.rb +52 -50
  40. data/test/test_stest.rb +27 -28
  41. data/test/test_stratified.rb +10 -10
  42. data/test/test_test_f.rb +17 -17
  43. data/test/test_test_kolmogorovsmirnov.rb +21 -21
  44. data/test/test_test_t.rb +52 -52
  45. data/test/test_umannwhitney.rb +16 -16
  46. data/test/test_vector.rb +419 -410
  47. data/test/test_wilcoxonsignedrank.rb +60 -63
  48. data/test/test_xls.rb +41 -41
  49. metadata +55 -5
  50. data/web/Rakefile +0 -39
data/test/test_csv.rb CHANGED
@@ -1,81 +1,63 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
- class StatsampleCSVTestCase < MiniTest::Unit::TestCase
1
+ require 'helpers_tests.rb'
2
+
3
+ class StatsampleCSVTestCase < Minitest::Test
3
4
  def setup
4
- @ds=Statsample::CSV.read(File.dirname(__FILE__)+"/fixtures/test_csv.csv")
5
+ @ds = Statsample::CSV.read('test/fixtures/test_csv.csv')
5
6
  end
7
+
6
8
  def test_read
7
- assert_equal(6,@ds.cases)
8
- assert_equal(%w{id name age city a1}, @ds.fields)
9
- id=[1,2,3,4,5,6].to_vector(:scale)
10
- name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
11
- age=[20,23,25,27,5.5,nil].to_vector(:scale)
12
- city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
13
- a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
14
- ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
15
- ds_exp.fields.each{|f|
16
- assert_equal(ds_exp[f],@ds[f])
9
+ header = %w(id name age city a1)
10
+ data = {
11
+ 'id' => [1, 2, 3, 4, 5, 6].to_vector(:scale),
12
+ 'name' => %w(Alex Claude Peter Franz George Fernand).to_vector(:nominal),
13
+ 'age' => [20, 23, 25, 27, 5.5, nil].to_vector(:scale),
14
+ 'city' => ['New York', 'London', 'London', 'Paris', 'Tome', nil].to_vector(:nominal),
15
+ 'a1' => ['a,b', 'b,c', 'a', nil, 'a,b,c', nil].to_vector(:nominal)
17
16
  }
18
- assert_equal(ds_exp,@ds)
19
- end
17
+
18
+ ds_exp = Statsample::Dataset.new(data, header)
19
+
20
+ assert_equal(6, @ds.cases)
21
+ assert_equal(header, @ds.fields)
22
+
23
+ ds_exp.fields.each do |f|
24
+ assert_equal(ds_exp[f], @ds[f])
25
+ end
26
+
27
+ assert_equal(ds_exp, @ds)
28
+ end
29
+
20
30
  def test_nil
21
- assert_equal(nil,@ds['age'][5])
31
+ assert_equal(nil, @ds['age'][5])
22
32
  end
33
+
23
34
  def test_repeated
24
- ds=Statsample::CSV.read(File.dirname(__FILE__)+"/fixtures/repeated_fields.csv")
25
- assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
26
- age=[3,4,5,6,nil,8].to_vector(:scale)
27
- assert_equal(age,ds['age_2'])
28
- end
35
+ ds = Statsample::CSV.read('test/fixtures/repeated_fields.csv')
36
+ assert_equal(%w(id name_1 age_1 city a1 name_2 age_2), ds.fields)
37
+ age = [3, 4, 5, 6, nil, 8].to_vector(:scale)
38
+ assert_equal(age, ds['age_2'])
39
+ end
40
+
41
+ # Testing fix for SciRuby/statsample#19.
42
+ def test_accept_scientific_notation_as_float
43
+ ds = Statsample::CSV.read('test/fixtures/scientific_notation.csv')
44
+ assert_equal(%w(x y), ds.fields)
45
+ y = [9.629587310436753e+127, 1.9341543147883677e+129, 3.88485279048245e+130]
46
+ y.zip(ds['y']).each do |y_expected, y_ds|
47
+ assert_in_delta(y_expected, y_ds)
48
+ end
49
+
50
+ end
51
+
29
52
  def test_write
30
- filename=Tempfile.new("afile")
31
- # filename=Dir::tmpdir+"/test_write.csv"
53
+ filename = Tempfile.new('afile')
32
54
  Statsample::CSV.write(@ds, filename.path)
33
- ds2=Statsample::CSV.read(filename.path)
34
- i=0
35
- ds2.each_array{|row|
36
- assert_equal(@ds.case_as_array(i),row)
37
- i+=1
38
- }
39
- end
40
- end
41
- =begin
42
- class StatsampleCSVTestCase2 < MiniTest::Unit::TestCase
43
- def setup
44
- @ds=Statsample::CSV.read19(File.dirname(__FILE__)+"/fixtures/test_csv.csv")
45
- end
46
- def test_read
47
- assert_equal(6,@ds.cases)
48
- assert_equal(%w{id name age city a1}, @ds.fields)
49
- id=[1,2,3,4,5,6].to_vector(:scale)
50
- name=["Alex","Claude","Peter","Franz","George","Fernand"].to_vector(:nominal)
51
- age=[20,23,25,27,5.5,nil].to_vector(:scale)
52
- city=["New York","London","London","Paris","Tome",nil].to_vector(:nominal)
53
- a1=["a,b","b,c","a",nil,"a,b,c",nil].to_vector(:nominal)
54
- ds_exp=Statsample::Dataset.new({'id'=>id,'name'=>name,'age'=>age,'city'=>city,'a1'=>a1}, %w{id name age city a1})
55
- ds_exp.fields.each{|f|
56
- assert_equal(ds_exp[f],@ds[f])
57
- }
58
- assert_equal(ds_exp,@ds)
59
- end
60
- def test_nil
61
- assert_equal(nil,@ds['age'][5])
62
- end
63
- def test_repeated
64
- ds=Statsample::CSV.read19(File.dirname(__FILE__)+"/fixtures/repeated_fields.csv")
65
- assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
66
- age=[3,4,5,6,nil,8].to_vector(:scale)
67
- assert_equal(age,ds['age_2'])
68
- end
69
- def test_write
70
- filename=Tempfile.new("afile")
71
- # filename=Dir::tmpdir+"/test_write.csv"
72
- Statsample::CSV.write(@ds, filename.path)
73
- ds2=Statsample::CSV.read19(filename.path)
74
- i=0
75
- ds2.each_array{|row|
76
- assert_equal(@ds.case_as_array(i),row)
77
- i+=1
78
- }
55
+ ds2 = Statsample::CSV.read(filename.path)
56
+ i = 0
57
+
58
+ ds2.each_array do |row|
59
+ assert_equal(@ds.case_as_array(i), row)
60
+ i += 1
61
+ end
79
62
  end
80
63
  end
81
- =end
data/test/test_dataset.rb CHANGED
@@ -1,188 +1,199 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
- class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleDatasetTestCase < Minitest::Test
3
3
  def setup
4
- @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
5
- 'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
6
- 'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
4
+ @ds = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([1, 2, 3, 4, 5]), 'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)), 'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
5
+ 'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
6
+ 'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) }, %w(id name age city a1))
7
7
  end
8
+
8
9
  def test_nest
9
- ds={
10
- 'a'=>%w{a a a b b b}.to_vector,
11
- 'b'=>%w{c c d d e e}.to_vector,
12
- 'c'=>%w{f g h i j k}.to_vector
10
+ ds = {
11
+ 'a' => %w(a a a b b b).to_vector,
12
+ 'b' => %w(c c d d e e).to_vector,
13
+ 'c' => %w(f g h i j k).to_vector
13
14
  }.to_dataset
14
- nest=ds.nest('a','b')
15
- assert_equal([{'c'=>'f'},{'c'=>'g'}], nest['a']['c'])
16
- assert_equal([{'c'=>'h'}], nest['a']['d'])
17
- assert_equal([{'c'=>'j'},{'c'=>'k'}], nest['b']['e'])
18
-
15
+ nest = ds.nest('a', 'b')
16
+ assert_equal([{ 'c' => 'f' }, { 'c' => 'g' }], nest['a']['c'])
17
+ assert_equal([{ 'c' => 'h' }], nest['a']['d'])
18
+ assert_equal([{ 'c' => 'j' }, { 'c' => 'k' }], nest['b']['e'])
19
19
  end
20
+
20
21
  def test_should_have_summary
21
- assert(@ds.summary.size>0)
22
+ assert(@ds.summary.size > 0)
22
23
  end
24
+
23
25
  def test_basic
24
- assert_equal(5,@ds.cases)
25
- assert_equal(%w{id name age city a1}, @ds.fields)
26
+ assert_equal(5, @ds.cases)
27
+ assert_equal(%w(id name age city a1), @ds.fields)
26
28
  end
29
+
27
30
  def test_saveload
28
- outfile=Tempfile.new("dataset.ds")
31
+ outfile = Tempfile.new('dataset.ds')
29
32
  @ds.save(outfile.path)
30
- a=Statsample.load(outfile.path)
31
- assert_equal(@ds,a)
33
+ a = Statsample.load(outfile.path)
34
+ assert_equal(@ds, a)
32
35
  end
36
+
33
37
  def test_gsl
34
38
  if Statsample.has_gsl?
35
- matrix=GSL::Matrix[[1,2],[3,4],[5,6]]
36
- ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
37
- assert_equal(matrix,ds.to_gsl)
39
+ matrix = GSL::Matrix[[1, 2], [3, 4], [5, 6]]
40
+ ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
41
+ assert_equal(matrix, ds.to_gsl)
38
42
  else
39
- skip("Gsl needed")
43
+ skip('Gsl needed')
40
44
  end
41
45
  end
46
+
42
47
  def test_matrix
43
- matrix=Matrix[[1,2],[3,4],[5,6]]
44
- ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
45
- assert_equal(matrix,ds.to_matrix)
48
+ matrix = Matrix[[1, 2], [3, 4], [5, 6]]
49
+ ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
50
+ assert_equal(matrix, ds.to_matrix)
46
51
  end
47
52
 
48
53
  def test_fields
49
- @ds.fields=%w{name a1 id age city}
50
- assert_equal(%w{name a1 id age city}, @ds.fields)
51
- @ds.fields=%w{id name age}
52
- assert_equal(%w{id name age a1 city}, @ds.fields)
54
+ @ds.fields = %w(name a1 id age city)
55
+ assert_equal(%w(name a1 id age city), @ds.fields)
56
+ @ds.fields = %w(id name age)
57
+ assert_equal(%w(id name age a1 city), @ds.fields)
53
58
  end
59
+
54
60
  def test_merge
55
- a=[1,2,3].to_scale
56
- b=[3,4,5].to_vector
57
- c=[4,5,6].to_scale
58
- d=[7,8,9].to_vector
59
- e=[10,20,30].to_vector
60
- ds1={'a'=>a,'b'=>b}.to_dataset
61
- ds2={'c'=>c,'d'=>d}.to_dataset
62
- exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
63
-
64
- assert_equal(exp,ds1.merge(ds2))
65
- exp.fields=%w{c d a b}
66
- assert_equal(exp,ds2.merge(ds1))
67
- ds3={'a'=>e}.to_dataset
68
- exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
69
- exp.fields=%w{a_1 b a_2}
70
- assert_equal(exp,ds1.merge(ds3))
61
+ a = [1, 2, 3].to_scale
62
+ b = [3, 4, 5].to_vector
63
+ c = [4, 5, 6].to_scale
64
+ d = [7, 8, 9].to_vector
65
+ e = [10, 20, 30].to_vector
66
+ ds1 = { 'a' => a, 'b' => b }.to_dataset
67
+ ds2 = { 'c' => c, 'd' => d }.to_dataset
68
+ exp = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
69
+
70
+ assert_equal(exp, ds1.merge(ds2))
71
+ exp.fields = %w(c d a b)
72
+ assert_equal(exp, ds2.merge(ds1))
73
+ ds3 = { 'a' => e }.to_dataset
74
+ exp = { 'a_1' => a, 'b' => b, 'a_2' => e }.to_dataset
75
+ exp.fields = %w(a_1 b a_2)
76
+ assert_equal(exp, ds1.merge(ds3))
71
77
  end
78
+
72
79
  def test_each_vector
73
- a=[1,2,3].to_vector
74
- b=[3,4,5].to_vector
75
- fields=["a","b"]
76
- ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
77
- res=[]
78
- ds.each_vector{|k,v|
79
- res.push([k,v])
80
+ a = [1, 2, 3].to_vector
81
+ b = [3, 4, 5].to_vector
82
+ fields = %w(a b)
83
+ ds = Statsample::Dataset.new({ 'a' => a, 'b' => b }, fields)
84
+ res = []
85
+ ds.each_vector{|k, v|
86
+ res.push([k, v])
80
87
  }
81
- assert_equal([["a",a],["b",b]],res)
82
- ds.fields=["b","a"]
83
- res=[]
84
- ds.each_vector{|k,v|
85
- res.push([k,v])
88
+ assert_equal([['a', a], ['b', b]], res)
89
+ ds.fields = %w(b a)
90
+ res = []
91
+ ds.each_vector{|k, v|
92
+ res.push([k, v])
86
93
  }
87
- assert_equal([["b",b],["a",a]],res)
94
+ assert_equal([['b', b], ['a', a]], res)
88
95
  end
96
+
89
97
  def test_equality
90
- v1=[1,2,3,4].to_vector
91
- v2=[5,6,7,8].to_vector
92
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
93
- v3=[1,2,3,4].to_vector
94
- v4=[5,6,7,8].to_vector
95
- ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
96
- assert_equal(ds1,ds2)
97
- ds2.fields=%w{v1 v2}
98
- assert_not_equal(ds1,ds2)
98
+ v1 = [1, 2, 3, 4].to_vector
99
+ v2 = [5, 6, 7, 8].to_vector
100
+ ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
101
+ v3 = [1, 2, 3, 4].to_vector
102
+ v4 = [5, 6, 7, 8].to_vector
103
+ ds2 = Statsample::Dataset.new({ 'v1' => v3, 'v2' => v4 }, %w(v2 v1))
104
+ assert_equal(ds1, ds2)
105
+ ds2.fields = %w(v1 v2)
106
+ assert_not_equal(ds1, ds2)
99
107
  end
108
+
100
109
  def test_add_vector
101
- v=Statsample::Vector.new(%w{a b c d e})
102
- @ds.add_vector('new',v)
103
- assert_equal(%w{id name age city a1 new},@ds.fields)
104
- x=Statsample::Vector.new(%w{a b c d e f g})
110
+ v = Statsample::Vector.new(%w(a b c d e))
111
+ @ds.add_vector('new', v)
112
+ assert_equal(%w(id name age city a1 new), @ds.fields)
113
+ x = Statsample::Vector.new(%w(a b c d e f g))
105
114
  assert_raise ArgumentError do
106
- @ds.add_vector('new2',x)
115
+ @ds.add_vector('new2', x)
107
116
  end
108
117
  end
118
+
109
119
  def test_vector_by_calculation
110
- a1=[1,2,3,4,5,6,7].to_vector(:scale)
111
- a2=[10,20,30,40,50,60,70].to_vector(:scale)
112
- a3=[100,200,300,400,500,600,700].to_vector(:scale)
113
- ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
114
- total=ds.vector_by_calculation() {|row|
115
- row['a1']+row['a2']+row['a3']
120
+ a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:scale)
121
+ a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:scale)
122
+ a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:scale)
123
+ ds = { 'a1' => a1, 'a2' => a2, 'a3' => a3 }.to_dataset
124
+ total = ds.vector_by_calculation {|row|
125
+ row['a1'] + row['a2'] + row['a3']
116
126
  }
117
- expected=[111,222,333,444,555,666,777].to_vector(:scale)
118
- assert_equal(expected,total)
127
+ expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:scale)
128
+ assert_equal(expected, total)
119
129
  end
130
+
120
131
  def test_vector_sum
121
- a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
122
- a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
123
- b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
124
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
125
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
126
- total=ds.vector_sum
127
- a=ds.vector_sum(['a1','a2'])
128
- b=ds.vector_sum(['b1','b2'])
129
- expected_a=[11,12,23,24,25,nil].to_vector(:scale)
130
- expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
131
- expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
132
+ a1 = [1, 2, 3, 4, 5, nil].to_vector(:scale)
133
+ a2 = [10, 10, 20, 20, 20, 30].to_vector(:scale)
134
+ b1 = [nil, 1, 1, 1, 1, 2].to_vector(:scale)
135
+ b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
136
+ ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2 }.to_dataset
137
+ total = ds.vector_sum
138
+ a = ds.vector_sum(%w(a1 a2))
139
+ b = ds.vector_sum(%w(b1 b2))
140
+ expected_a = [11, 12, 23, 24, 25, nil].to_vector(:scale)
141
+ expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:scale)
142
+ expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:scale)
132
143
  assert_equal(expected_a, a)
133
144
  assert_equal(expected_b, b)
134
145
  assert_equal(expected_total, total)
135
146
  end
147
+
136
148
  def test_vector_missing_values
137
- a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
138
- a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
139
- b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
140
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
141
- c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
142
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
143
- mva=[2,3,0,1,0,1].to_vector(:scale)
144
- assert_equal(mva,ds.vector_missing_values)
145
- end
146
-
149
+ a1 = [1, nil, 3, 4, 5, nil].to_vector(:scale)
150
+ a2 = [10, nil, 20, 20, 20, 30].to_vector(:scale)
151
+ b1 = [nil, nil, 1, 1, 1, 2].to_vector(:scale)
152
+ b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
153
+ c = [nil, 2, 4, 2, 2, 2].to_vector(:scale)
154
+ ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
155
+ mva = [2, 3, 0, 1, 0, 1].to_vector(:scale)
156
+ assert_equal(mva, ds.vector_missing_values)
157
+ end
158
+
147
159
  def test_has_missing_values
148
- a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
149
- a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
150
- b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
151
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
152
- c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
153
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
160
+ a1 = [1, nil, 3, 4, 5, nil].to_vector(:scale)
161
+ a2 = [10, nil, 20, 20, 20, 30].to_vector(:scale)
162
+ b1 = [nil, nil, 1, 1, 1, 2].to_vector(:scale)
163
+ b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
164
+ c = [nil, 2, 4, 2, 2, 2].to_vector(:scale)
165
+ ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
154
166
  assert(ds.has_missing_data?)
155
- clean=ds.dup_only_valid
167
+ clean = ds.dup_only_valid
156
168
  assert(!clean.has_missing_data?)
157
169
  end
158
-
159
-
160
- def test_vector_count_characters
161
- a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
162
- a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
163
- b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
164
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
165
- c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
166
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
167
- exp=[4,17,27,5,6,5].to_vector(:scale)
168
- assert_equal(exp,ds.vector_count_characters)
169
170
 
171
+ def test_vector_count_characters
172
+ a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:scale)
173
+ a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:scale)
174
+ b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:scale)
175
+ b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
176
+ c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:scale)
177
+ ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
178
+ exp = [4, 17, 27, 5, 6, 5].to_vector(:scale)
179
+ assert_equal(exp, ds.vector_count_characters)
170
180
  end
181
+
171
182
  def test_vector_mean
172
- a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
173
- a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
174
- b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
175
- b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
176
- c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
177
- ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
178
- total=ds.vector_mean
179
- a=ds.vector_mean(['a1','a2'],1)
180
- b=ds.vector_mean(['b1','b2'],1)
181
- c=ds.vector_mean(['b1','b2','c'],1)
182
- expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
183
- expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
184
- expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
185
- expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
183
+ a1 = [1, 2, 3, 4, 5, nil].to_vector(:scale)
184
+ a2 = [10, 10, 20, 20, 20, 30].to_vector(:scale)
185
+ b1 = [nil, 1, 1, 1, 1, 2].to_vector(:scale)
186
+ b2 = [2, 2, 2, nil, 2, 3].to_vector(:scale)
187
+ c = [nil, 2, 4, 2, 2, 2].to_vector(:scale)
188
+ ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
189
+ total = ds.vector_mean
190
+ a = ds.vector_mean(%w(a1 a2), 1)
191
+ b = ds.vector_mean(%w(b1 b2), 1)
192
+ c = ds.vector_mean(%w(b1 b2 c), 1)
193
+ expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:scale)
194
+ expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:scale)
195
+ expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:scale)
196
+ expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:scale)
186
197
  assert_equal(expected_a, a)
187
198
  assert_equal(expected_b, b)
188
199
  assert_equal(expected_c, c)
@@ -190,273 +201,279 @@ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
190
201
  end
191
202
 
192
203
  def test_each_array
193
- expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
194
- out=[]
204
+ expected = [[1, 'Alex', 20, 'New York', 'a,b'], [2, 'Claude', 23, 'London', 'b,c'], [3, 'Peter', 25, 'London', 'a'], [4, 'Franz', 27, 'Paris', nil], [5, 'George', 5, 'Tome', 'a,b,c']]
205
+ out = []
195
206
  @ds.each_array{ |a|
196
207
  out.push(a)
197
208
  }
198
- assert_equal(expected,out)
209
+ assert_equal(expected, out)
199
210
  end
211
+
200
212
  def test_recode
201
- @ds['age'].type=:scale
202
- @ds.recode!("age") {|c| c['id']*2}
203
- expected=[2,4,6,8,10].to_vector(:scale)
204
- assert_equal(expected,@ds['age'])
213
+ @ds['age'].type = :scale
214
+ @ds.recode!('age') { |c| c['id'] * 2 }
215
+ expected = [2, 4, 6, 8, 10].to_vector(:scale)
216
+ assert_equal(expected, @ds['age'])
205
217
  end
218
+
206
219
  def test_case_as
207
- assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds.case_as_hash(0))
208
- assert_equal([5,'George',5,'Tome','a,b,c'],@ds.case_as_array(4))
220
+ assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds.case_as_hash(0))
221
+ assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds.case_as_array(4))
209
222
  # Native methods
210
- assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds._case_as_hash(0))
211
- assert_equal([5,'George',5,'Tome','a,b,c'],@ds._case_as_array(4))
212
-
213
-
214
-
223
+ assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds._case_as_hash(0))
224
+ assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds._case_as_array(4))
215
225
  end
226
+
216
227
  def test_delete_vector
217
228
  @ds.delete_vector('name')
218
- assert_equal(%w{id age city a1},@ds.fields)
219
- assert_equal(%w{a1 age city id},@ds.vectors.keys.sort)
229
+ assert_equal(%w(id age city a1), @ds.fields)
230
+ assert_equal(%w(a1 age city id), @ds.vectors.keys.sort)
220
231
  end
232
+
221
233
  def test_change_type
222
- @ds.col('age').type=:scale
223
- assert_equal(:scale,@ds.col('age').type)
234
+ @ds.col('age').type = :scale
235
+ assert_equal(:scale, @ds.col('age').type)
224
236
  end
237
+
225
238
  def test_split_by_separator_recode
226
- @ds.add_vectors_by_split_recode("a1","_")
227
- assert_equal(%w{id name age city a1 a1_1 a1_2 a1_3},@ds.fields)
228
- assert_equal([1,0,1,nil,1],@ds.col('a1_1').to_a)
229
- assert_equal([1,1,0,nil,1],@ds.col('a1_2').to_a)
230
- assert_equal([0,1,0,nil,1],@ds.col('a1_3').to_a)
231
- {'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'}.each do |k,v|
239
+ @ds.add_vectors_by_split_recode('a1', '_')
240
+ assert_equal(%w(id name age city a1 a1_1 a1_2 a1_3), @ds.fields)
241
+ assert_equal([1, 0, 1, nil, 1], @ds.col('a1_1').to_a)
242
+ assert_equal([1, 1, 0, nil, 1], @ds.col('a1_2').to_a)
243
+ assert_equal([0, 1, 0, nil, 1], @ds.col('a1_3').to_a)
244
+ { 'a1_1' => 'a1:a', 'a1_2' => 'a1:b', 'a1_3' => 'a1:c' }.each do |k, v|
232
245
  assert_equal(v, @ds[k].name)
233
246
  end
234
247
  end
248
+
235
249
  def test_split_by_separator
236
- @ds.add_vectors_by_split("a1","_")
237
- assert_equal(%w{id name age city a1 a1_a a1_b a1_c},@ds.fields)
238
- assert_equal([1,0,1,nil,1],@ds.col('a1_a').to_a)
239
- assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
240
- assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
250
+ @ds.add_vectors_by_split('a1', '_')
251
+ assert_equal(%w(id name age city a1 a1_a a1_b a1_c), @ds.fields)
252
+ assert_equal([1, 0, 1, nil, 1], @ds.col('a1_a').to_a)
253
+ assert_equal([1, 1, 0, nil, 1], @ds.col('a1_b').to_a)
254
+ assert_equal([0, 1, 0, nil, 1], @ds.col('a1_c').to_a)
241
255
  end
256
+
242
257
  def test_percentiles
243
- v1=(1..100).to_a.to_scale
244
- assert_equal(50.5,v1.median)
258
+ v1 = (1..100).to_a.to_scale
259
+ assert_equal(50.5, v1.median)
245
260
  assert_equal(25.5, v1.percentil(25))
246
- v2=(1..99).to_a.to_scale
247
- assert_equal(50,v2.median)
248
- assert_equal(25,v2.percentil(25))
249
- v3=(1..50).to_a.to_scale
261
+ v2 = (1..99).to_a.to_scale
262
+ assert_equal(50, v2.median)
263
+ assert_equal(25, v2.percentil(25))
264
+ v3 = (1..50).to_a.to_scale
250
265
  assert_equal(25.5, v3.median)
251
266
  assert_equal(13, v3.percentil(25))
252
-
253
267
  end
268
+
254
269
  def test_add_case
255
- ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
256
- ds.add_case([1,2,3])
257
- ds.add_case({'a'=>4,'b'=>5,'c'=>6})
258
- ds.add_case([[7,8,9],%w{a b c}])
259
- assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
260
- assert_equal([4,5,6],ds.case_as_array(1))
261
- assert_equal([7,8,9],ds.case_as_array(2))
262
- assert_equal(['a','b','c'],ds.case_as_array(3))
263
- ds.add_case_array([6,7,1])
270
+ ds = Statsample::Dataset.new('a' => [].to_vector, 'b' => [].to_vector, 'c' => [].to_vector)
271
+ ds.add_case([1, 2, 3])
272
+ ds.add_case('a' => 4, 'b' => 5, 'c' => 6)
273
+ ds.add_case([[7, 8, 9], %w(a b c)])
274
+ assert_equal({ 'a' => 1, 'b' => 2, 'c' => 3 }, ds.case_as_hash(0))
275
+ assert_equal([4, 5, 6], ds.case_as_array(1))
276
+ assert_equal([7, 8, 9], ds.case_as_array(2))
277
+ assert_equal(%w(a b c), ds.case_as_array(3))
278
+ ds.add_case_array([6, 7, 1])
264
279
  ds.update_valid_data
265
- assert_equal([6,7,1],ds.case_as_array(4))
266
-
280
+ assert_equal([6, 7, 1], ds.case_as_array(4))
267
281
  end
282
+
268
283
  def test_marshaling
269
- ds_marshal=Marshal.load(Marshal.dump(@ds))
270
- assert_equal(ds_marshal,@ds)
284
+ ds_marshal = Marshal.load(Marshal.dump(@ds))
285
+ assert_equal(ds_marshal, @ds)
271
286
  end
272
- def test_range
273
- v1=[1,2,3,4].to_vector
274
- v2=[5,6,7,8].to_vector
275
- v3=[9,10,11,12].to_vector
276
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
277
- assert_same(v1,ds1['v1'])
278
- ds2=ds1["v2".."v1"]
279
- assert_equal(%w{v2 v1},ds2.fields)
280
- assert_same(ds1['v1'],ds2['v1'])
281
- assert_same(ds1['v2'],ds2['v2'])
282
-
283
287
 
288
+ def test_range
289
+ v1 = [1, 2, 3, 4].to_vector
290
+ v2 = [5, 6, 7, 8].to_vector
291
+ v3 = [9, 10, 11, 12].to_vector
292
+ ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2, 'v3' => v3 }, %w(v3 v2 v1))
293
+ assert_same(v1, ds1['v1'])
294
+ ds2 = ds1['v2'..'v1']
295
+ assert_equal(%w(v2 v1), ds2.fields)
296
+ assert_same(ds1['v1'], ds2['v1'])
297
+ assert_same(ds1['v2'], ds2['v2'])
284
298
  end
299
+
285
300
  def test_clone
286
- v1=[1,2,3,4].to_vector
287
- v2=[5,6,7,8].to_vector
288
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
289
- ds2=ds1.clone
290
- assert_equal(ds1,ds2)
291
- assert_not_same(ds1,ds2)
292
- assert_equal(ds1['v1'],ds2['v1'])
301
+ v1 = [1, 2, 3, 4].to_vector
302
+ v2 = [5, 6, 7, 8].to_vector
303
+ ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
304
+ ds2 = ds1.clone
305
+ assert_equal(ds1, ds2)
306
+ assert_not_same(ds1, ds2)
307
+ assert_equal(ds1['v1'], ds2['v1'])
293
308
  assert_same(ds1['v1'], ds2['v1'])
294
- assert_equal(ds1.fields,ds2.fields)
295
- assert_not_same(ds1.fields,ds2.fields)
296
- assert_equal(ds1.cases,ds2.cases)
309
+ assert_equal(ds1.fields, ds2.fields)
310
+ assert_not_same(ds1.fields, ds2.fields)
311
+ assert_equal(ds1.cases, ds2.cases)
297
312
 
298
313
  # partial clone
299
- ds3=ds1.clone('v1')
300
- ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
301
- assert_equal(ds_exp,ds3)
302
- assert_not_same(ds_exp,ds3)
303
- assert_equal(ds3['v1'],ds_exp['v1'])
304
- assert_same(ds3['v1'],ds_exp['v1'])
305
- assert_equal(ds3.fields,ds_exp.fields)
306
- assert_equal(ds3.cases,ds_exp.cases)
307
-
308
- assert_not_same(ds3.fields,ds_exp.fields)
309
-
314
+ ds3 = ds1.clone('v1')
315
+ ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
316
+ assert_equal(ds_exp, ds3)
317
+ assert_not_same(ds_exp, ds3)
318
+ assert_equal(ds3['v1'], ds_exp['v1'])
319
+ assert_same(ds3['v1'], ds_exp['v1'])
320
+ assert_equal(ds3.fields, ds_exp.fields)
321
+ assert_equal(ds3.cases, ds_exp.cases)
322
+
323
+ assert_not_same(ds3.fields, ds_exp.fields)
310
324
  end
325
+
311
326
  def test_dup
312
- v1=[1,2,3,4].to_vector
313
- v2=[5,6,7,8].to_vector
314
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
315
- ds2=ds1.dup
316
- assert_equal(ds1,ds2)
317
- assert_not_same(ds1,ds2)
318
- assert_equal(ds1['v1'],ds2['v1'])
319
- assert_not_same(ds1['v1'],ds2['v1'])
320
- assert_equal(ds1.cases,ds2.cases)
321
-
322
- assert_equal(ds1.fields,ds2.fields)
323
- assert_not_same(ds1.fields,ds2.fields)
324
- ds1['v1'].type=:scale
325
- # dup partial
326
- ds3=ds1.dup('v1')
327
- ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
328
- assert_equal(ds_exp,ds3)
329
- assert_not_same(ds_exp,ds3)
330
- assert_equal(ds3['v1'],ds_exp['v1'])
331
- assert_not_same(ds3['v1'],ds_exp['v1'])
332
- assert_equal(ds3.fields,ds_exp.fields)
333
- assert_equal(ds3.cases,ds_exp.cases)
327
+ v1 = [1, 2, 3, 4].to_vector
328
+ v2 = [5, 6, 7, 8].to_vector
329
+ ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
330
+ ds2 = ds1.dup
331
+ assert_equal(ds1, ds2)
332
+ assert_not_same(ds1, ds2)
333
+ assert_equal(ds1['v1'], ds2['v1'])
334
+ assert_not_same(ds1['v1'], ds2['v1'])
335
+ assert_equal(ds1.cases, ds2.cases)
334
336
 
335
- assert_not_same(ds3.fields,ds_exp.fields)
337
+ assert_equal(ds1.fields, ds2.fields)
338
+ assert_not_same(ds1.fields, ds2.fields)
339
+ ds1['v1'].type = :scale
340
+ # dup partial
341
+ ds3 = ds1.dup('v1')
342
+ ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
343
+ assert_equal(ds_exp, ds3)
344
+ assert_not_same(ds_exp, ds3)
345
+ assert_equal(ds3['v1'], ds_exp['v1'])
346
+ assert_not_same(ds3['v1'], ds_exp['v1'])
347
+ assert_equal(ds3.fields, ds_exp.fields)
348
+ assert_equal(ds3.cases, ds_exp.cases)
336
349
 
350
+ assert_not_same(ds3.fields, ds_exp.fields)
337
351
 
338
352
  # empty
339
- ds3=ds1.dup_empty
340
- assert_not_equal(ds1,ds3)
341
- assert_not_equal(ds1['v1'],ds3['v1'])
342
- assert_equal([],ds3['v1'].data)
343
- assert_equal([],ds3['v2'].data)
344
- assert_equal(:scale,ds3['v1'].type)
345
- assert_equal(ds1.fields,ds2.fields)
346
- assert_not_same(ds1.fields,ds2.fields)
353
+ ds3 = ds1.dup_empty
354
+ assert_not_equal(ds1, ds3)
355
+ assert_not_equal(ds1['v1'], ds3['v1'])
356
+ assert_equal([], ds3['v1'].data)
357
+ assert_equal([], ds3['v2'].data)
358
+ assert_equal(:scale, ds3['v1'].type)
359
+ assert_equal(ds1.fields, ds2.fields)
360
+ assert_not_same(ds1.fields, ds2.fields)
347
361
  end
362
+
348
363
  def test_from_to
349
- assert_equal(%w{name age city}, @ds.from_to("name","city"))
364
+ assert_equal(%w(name age city), @ds.from_to('name', 'city'))
350
365
  assert_raise ArgumentError do
351
- @ds.from_to("name","a2")
366
+ @ds.from_to('name', 'a2')
352
367
  end
353
368
  end
369
+
354
370
  def test_each_array_with_nils
355
- v1=[1,-99,3,4,"na"].to_vector(:scale,:missing_values=>[-99,"na"])
356
- v2=[5,6,-99,8,20].to_vector(:scale,:missing_values=>[-99])
357
- v3=[9,10,11,12,20].to_vector(:scale,:missing_values=>[-99])
358
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
359
- ds2=ds1.dup_empty
371
+ v1 = [1, -99, 3, 4, 'na'].to_vector(:scale, missing_values: [-99, 'na'])
372
+ v2 = [5, 6, -99, 8, 20].to_vector(:scale, missing_values: [-99])
373
+ v3 = [9, 10, 11, 12, 20].to_vector(:scale, missing_values: [-99])
374
+ ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
375
+ ds2 = ds1.dup_empty
360
376
  ds1.each_array_with_nils {|row|
361
377
  ds2.add_case_array(row)
362
378
  }
363
379
  ds2.update_valid_data
364
- assert_equal([1,nil,3,4,nil],ds2['v1'].data)
365
- assert_equal([5,6,nil,8,20],ds2['v2'].data)
380
+ assert_equal([1, nil, 3, 4, nil], ds2['v1'].data)
381
+ assert_equal([5, 6, nil, 8, 20], ds2['v2'].data)
366
382
  end
383
+
367
384
  def test_dup_only_valid
368
- v1=[1,nil,3,4].to_vector(:scale)
369
- v2=[5,6,nil,8].to_vector(:scale)
370
- v3=[9,10,11,12].to_vector(:scale)
371
- ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
372
- ds2=ds1.dup_only_valid
373
- expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
374
- assert_equal(expected,ds2)
375
- assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
376
- expected_partial=Statsample::Dataset.new({'v1'=>[1,3,4].to_vector(:scale), 'v3'=>[9, 11,12].to_vector(:scale)})
377
- assert_equal(expected_partial, ds1.dup_only_valid(%w{v1 v3}))
378
-
379
-
385
+ v1 = [1, nil, 3, 4].to_vector(:scale)
386
+ v2 = [5, 6, nil, 8].to_vector(:scale)
387
+ v3 = [9, 10, 11, 12].to_vector(:scale)
388
+ ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
389
+ ds2 = ds1.dup_only_valid
390
+ expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:scale), 'v2' => [5, 8].to_vector(:scale), 'v3' => [9, 12].to_vector(:scale))
391
+ assert_equal(expected, ds2)
392
+ assert_equal(expected.vectors.values, Statsample.only_valid(v1, v2, v3))
393
+ expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:scale), 'v3' => [9, 11, 12].to_vector(:scale))
394
+ assert_equal(expected_partial, ds1.dup_only_valid(%w(v1 v3)))
380
395
  end
396
+
381
397
  def test_filter
382
- @ds['age'].type=:scale
383
- filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
384
- expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
385
- 'city'=>Statsample::Vector.new(['London','Paris']),
386
- 'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
387
- assert_equal(expected,filtered)
398
+ @ds['age'].type = :scale
399
+ filtered = @ds.filter { |c| c['id'] == 2 or c['id'] == 4 }
400
+ expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :scale),
401
+ 'city' => Statsample::Vector.new(%w(London Paris)),
402
+ 'a1' => Statsample::Vector.new(['b,c', nil]) }, %w(id name age city a1))
403
+ assert_equal(expected, filtered)
388
404
  end
389
- def test_filter_field
390
- @ds['age'].type=:scale
391
- filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
392
- expected=[2,4].to_vector
393
- assert_equal(expected,filtered)
394
405
 
406
+ def test_filter_field
407
+ @ds['age'].type = :scale
408
+ filtered = @ds.filter_field('id') { |c| c['id'] == 2 or c['id'] == 4 }
409
+ expected = [2, 4].to_vector
410
+ assert_equal(expected, filtered)
395
411
  end
412
+
396
413
  def test_verify
397
- name=%w{r1 r2 r3 r4}.to_vector(:nominal)
398
- v1=[1,2,3,4].to_vector(:scale)
399
- v2=[4,3,2,1].to_vector(:scale)
400
- v3=[10,20,30,40].to_vector(:scale)
401
- v4=%w{a b a b}.to_vector(:nominal)
402
- ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
403
- ds.fields=%w{v1 v2 v3 v4 id}
404
- #Correct
405
- t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
406
- t2=create_test("v3=v1*10") {|r| r['v3']==r['v1']*10}
414
+ name = %w(r1 r2 r3 r4).to_vector(:nominal)
415
+ v1 = [1, 2, 3, 4].to_vector(:scale)
416
+ v2 = [4, 3, 2, 1].to_vector(:scale)
417
+ v3 = [10, 20, 30, 40].to_vector(:scale)
418
+ v4 = %w(a b a b).to_vector(:nominal)
419
+ ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'id' => name }.to_dataset
420
+ ds.fields = %w(v1 v2 v3 v4 id)
421
+ # Correct
422
+ t1 = create_test('If v4=a, v1 odd') { |r| r['v4'] == 'b' or (r['v4'] == 'a' and r['v1'].odd?) }
423
+ t2 = create_test('v3=v1*10') { |r| r['v3'] == r['v1'] * 10 }
407
424
  # Fail!
408
- t3=create_test("v4='b'") {|r| r['v4']=='b'}
409
- exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
410
- exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
411
- res=ds.verify(t3,t1,t2)
412
- assert_equal(exp1,res)
413
- res=ds.verify('id',t1,t2,t3)
414
- assert_equal(exp2,res)
425
+ t3 = create_test("v4='b'") { |r| r['v4'] == 'b' }
426
+ exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
427
+ exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
428
+ res = ds.verify(t3, t1, t2)
429
+ assert_equal(exp1, res)
430
+ res = ds.verify('id', t1, t2, t3)
431
+ assert_equal(exp2, res)
415
432
  end
416
- def test_compute_operation
417
- v1=[1,2,3,4].to_vector(:scale)
418
- v2=[4,3,2,1].to_vector(:scale)
419
- v3=[10,20,30,40].to_vector(:scale)
420
- vscale=[1.quo(2),1,3.quo(2),2].to_vector(:scale)
421
- vsum=[1+4+10.0,2+3+20.0,3+2+30.0,4+1+40.0].to_vector(:scale)
422
- vmult=[1*4,2*3,3*2,4*1].to_vector(:scale)
423
- ds={'v1'=>v1,'v2'=>v2,'v3'=>v3}.to_dataset
424
- assert_equal(vscale,ds.compute("v1/2"))
425
- assert_equal(vsum,ds.compute("v1+v2+v3"))
426
- assert_equal(vmult,ds.compute("v1*v2"))
427
433
 
434
+ def test_compute_operation
435
+ v1 = [1, 2, 3, 4].to_vector(:scale)
436
+ v2 = [4, 3, 2, 1].to_vector(:scale)
437
+ v3 = [10, 20, 30, 40].to_vector(:scale)
438
+ vscale = [1.quo(2), 1, 3.quo(2), 2].to_vector(:scale)
439
+ vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:scale)
440
+ vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:scale)
441
+ ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3 }.to_dataset
442
+ assert_equal(vscale, ds.compute('v1/2'))
443
+ assert_equal(vsum, ds.compute('v1+v2+v3'))
444
+ assert_equal(vmult, ds.compute('v1*v2'))
428
445
  end
446
+
429
447
  def test_crosstab_with_asignation
430
- v1=%w{a a a b b b c c c}.to_vector
431
- v2=%w{a b c a b c a b c}.to_vector
432
- v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
433
- ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
448
+ v1 = %w(a a a b b b c c c).to_vector
449
+ v2 = %w(a b c a b c a b c).to_vector
450
+ v3 = %w(0 1 0 0 1 1 0 0 1).to_scale
451
+ ds = Statsample::Dataset.crosstab_by_asignation(v1, v2, v3)
434
452
  assert_equal(:nominal, ds['_id'].type)
435
453
  assert_equal(:scale, ds['a'].type)
436
454
  assert_equal(:scale, ds['b'].type)
437
- ev_id=%w{a b c}.to_vector
438
- ev_a =%w{0 0 0}.to_scale
439
- ev_b =%w{1 1 0}.to_scale
440
- ev_c =%w{0 1 1}.to_scale
441
- ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
455
+ ev_id = %w(a b c).to_vector
456
+ ev_a = %w(0 0 0).to_scale
457
+ ev_b = %w(1 1 0).to_scale
458
+ ev_c = %w(0 1 1).to_scale
459
+ ds2 = { '_id' => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }.to_dataset
442
460
  assert_equal(ds, ds2)
443
461
  end
462
+
444
463
  def test_one_to_many
445
- cases=[
446
- ['1','george','red',10,'blue',20,nil,nil],
447
- ['2','fred','green',15,'orange',30,'white',20],
448
- ['3','alfred',nil,nil,nil,nil,nil,nil]
464
+ cases = [
465
+ ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
466
+ ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
467
+ ['3', 'alfred', nil, nil, nil, nil, nil, nil]
449
468
  ]
450
- ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
451
- cases.each {|c| ds.add_case_array c }
469
+ ds = Statsample::Dataset.new(%w(id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3))
470
+ cases.each { |c| ds.add_case_array c }
452
471
  ds.update_valid_data
453
- ids=%w{1 1 2 2 2}.to_vector
454
- colors=%w{red blue green orange white}.to_vector
455
- values=[10,20,15,30,20].to_vector
456
- col_ids=[1,2,1,2,3].to_scale
457
- ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
458
- assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
459
-
472
+ ids = %w(1 1 2 2 2).to_vector
473
+ colors = %w(red blue green orange white).to_vector
474
+ values = [10, 20, 15, 30, 20].to_vector
475
+ col_ids = [1, 2, 1, 2, 3].to_scale
476
+ ds_expected = { 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values }.to_dataset(%w(id _col_id color value))
477
+ assert_equal(ds_expected, ds.one_to_many(%w(id), 'car_%v%n'))
460
478
  end
461
-
462
479
  end