statsample 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/History.txt +79 -0
  2. data/Manifest.txt +56 -0
  3. data/README.txt +77 -0
  4. data/Rakefile +22 -0
  5. data/bin/statsample +2 -0
  6. data/demo/benchmark.rb +52 -0
  7. data/demo/chi-square.rb +44 -0
  8. data/demo/dice.rb +13 -0
  9. data/demo/distribution_t.rb +95 -0
  10. data/demo/graph.rb +9 -0
  11. data/demo/item_analysis.rb +30 -0
  12. data/demo/mean.rb +81 -0
  13. data/demo/proportion.rb +57 -0
  14. data/demo/sample_test.csv +113 -0
  15. data/demo/strata_proportion.rb +152 -0
  16. data/demo/stratum.rb +141 -0
  17. data/lib/spss.rb +131 -0
  18. data/lib/statsample.rb +216 -0
  19. data/lib/statsample/anova.rb +74 -0
  20. data/lib/statsample/bivariate.rb +255 -0
  21. data/lib/statsample/chidistribution.rb +39 -0
  22. data/lib/statsample/codification.rb +120 -0
  23. data/lib/statsample/converters.rb +338 -0
  24. data/lib/statsample/crosstab.rb +122 -0
  25. data/lib/statsample/dataset.rb +526 -0
  26. data/lib/statsample/dominanceanalysis.rb +259 -0
  27. data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
  28. data/lib/statsample/graph/gdchart.rb +45 -0
  29. data/lib/statsample/graph/svgboxplot.rb +108 -0
  30. data/lib/statsample/graph/svggraph.rb +181 -0
  31. data/lib/statsample/graph/svghistogram.rb +208 -0
  32. data/lib/statsample/graph/svgscatterplot.rb +111 -0
  33. data/lib/statsample/htmlreport.rb +232 -0
  34. data/lib/statsample/multiset.rb +281 -0
  35. data/lib/statsample/regression.rb +522 -0
  36. data/lib/statsample/reliability.rb +235 -0
  37. data/lib/statsample/resample.rb +20 -0
  38. data/lib/statsample/srs.rb +159 -0
  39. data/lib/statsample/test.rb +25 -0
  40. data/lib/statsample/vector.rb +759 -0
  41. data/test/_test_chart.rb +58 -0
  42. data/test/test_anova.rb +31 -0
  43. data/test/test_codification.rb +59 -0
  44. data/test/test_crosstab.rb +55 -0
  45. data/test/test_csv.csv +7 -0
  46. data/test/test_csv.rb +27 -0
  47. data/test/test_dataset.rb +293 -0
  48. data/test/test_ggobi.rb +42 -0
  49. data/test/test_multiset.rb +98 -0
  50. data/test/test_regression.rb +108 -0
  51. data/test/test_reliability.rb +32 -0
  52. data/test/test_resample.rb +23 -0
  53. data/test/test_srs.rb +14 -0
  54. data/test/test_statistics.rb +152 -0
  55. data/test/test_stratified.rb +19 -0
  56. data/test/test_svg_graph.rb +63 -0
  57. data/test/test_vector.rb +265 -0
  58. data/test/test_xls.rb +32 -0
  59. metadata +158 -0
@@ -0,0 +1,58 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'tempfile'
3
+ require 'test/unit'
4
+ require 'statsample/chart/gdchart'
5
+ # Not included on default test, because GDChart send a lot of warnings!
6
+ class StatsampleChartTestCase < Test::Unit::TestCase
7
+
8
+ def initialize(*args)
9
+ @image_path=File.dirname(__FILE__)+"/images"
10
+ super
11
+ end
12
+
13
+ def test_base_chart
14
+ file=@image_path+"/gdchart_base_bar_1.jpg"
15
+ width=500
16
+ height=300
17
+ chart_type=GDChart::BAR
18
+ labels=["a","b","c","d","e"]
19
+ options={'set_color'=>[0xFF3399]}
20
+ n_data=1
21
+ data=[10,40,30,20,40]
22
+
23
+ Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
24
+ assert(File.exists?(file))
25
+ %w{STACK_DEPTH STACK_SUM STACK_BESIDE STACK_LAYER}.each{|stack|
26
+ file=@image_path+"/gdchart_base_bar_2_#{stack}.jpg"
27
+ n_data=2
28
+ options={'set_color'=>[0xFF3399,0x33FF99,0xFF99FF,0xFF3399], 'stack_type'=>GDChart.const_get(stack.intern),'title'=>"Bar #{stack}"}
29
+
30
+ chart_type=GDChart::BAR
31
+
32
+ data=[10,15,10,20,30,30,20,5,15,20]
33
+ Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
34
+ assert(File.exists?(file))
35
+ }
36
+ end
37
+ def test_vector
38
+ file=@image_path+"/gdchart_bar.jpg"
39
+ ar=[]
40
+ (1..1000).each {|a|
41
+ ar.push(rand(10))
42
+ }
43
+ vector=ar.to_vector
44
+ file=@image_path+"/gdchart_bar.jpg"
45
+ vector.gdchart_frequencies(file,800,600,GDChart::BAR,'title'=>'Bar')
46
+ assert(File.exists?(file))
47
+ file=@image_path+"/gdchart_bar3d.jpg"
48
+ vector.gdchart_frequencies(file,300,100,GDChart::BAR3D,'title'=>'Bar3D')
49
+ assert(File.exists?(file))
50
+ file=@image_path+"/gdchart_floatingbar.jpg"
51
+ vector.gdchart_frequencies(file,200,200,GDChart::LINE,'title'=>'FloatingBar')
52
+ assert(File.exists?(file))
53
+ vector.type=:scale
54
+ file=@image_path+"/gdchart_histogram.jpg"
55
+ vector.gdchart_histogram(5,file,300,400,GDChart::BAR,'title'=>'Histogram')
56
+ assert(File.exists?(file))
57
+ end
58
+ end
@@ -0,0 +1,31 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample.rb'
2
+ require 'test/unit'
3
+
4
+ class StatsampleAnovaTestCase < Test::Unit::TestCase
5
+ def initialize(*args)
6
+ @v1=[3,3,2,3,6].to_vector(:scale)
7
+ @v2=[7,6,5,6,7].to_vector(:scale)
8
+ @v3=[9,8,9,7,8].to_vector(:scale)
9
+ @anova=Statsample::Anova::OneWay.new([@v1,@v2,@v3])
10
+ super
11
+ end
12
+ def test_basic
13
+ assert_in_delta(72.933, @anova.sst,0.001)
14
+ assert_in_delta(14.8,@anova.sswg,0.001)
15
+ assert_in_delta(58.133,@anova.ssbg,0.001)
16
+ assert_in_delta(@anova.sst,@anova.sswg+@anova.ssbg,0.00001)
17
+ assert_equal(14,@anova.df_total)
18
+ assert_equal(12,@anova.df_wg)
19
+ assert_equal(2,@anova.df_bg)
20
+ assert_in_delta(23.568,@anova.f,0.001)
21
+ anova2=Statsample::Anova::OneWay.new([@v1,@v1,@v1,@v1,@v2])
22
+ assert_in_delta(3.960, anova2.f,0.001)
23
+
24
+ if HAS_GSL
25
+ assert(@anova.significance<0.01)
26
+ assert_in_delta(0.016, anova2.significance,0.001)
27
+ else
28
+ puts "Skipped OneWay#significance (no GSL)"
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,59 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'tempfile'
3
+ require 'test/unit'
4
+
5
+ class StatsampleCodificationTestCase < Test::Unit::TestCase
6
+
7
+ def initialize(*args)
8
+ v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
9
+ @dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s','dream'=>'d','dreaming'=>'d'}
10
+ @ds={"v1"=>v1}.to_dataset
11
+ super
12
+ end
13
+ def test_create_yaml
14
+ assert_raise ArgumentError do
15
+ Statsample::Codification.create_yaml(@ds,[])
16
+ end
17
+ expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
18
+ yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
19
+ h=YAML::load(yaml_hash)
20
+ assert_equal(['v1'],h.keys)
21
+ assert_equal(expected_keys_v1,h['v1'].keys.sort)
22
+ tf = Tempfile.new("test_codification")
23
+ yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],Statsample::SPLIT_TOKEN,tf)
24
+ tf.close
25
+ tf.open
26
+ h=YAML::load(tf)
27
+ assert_equal(['v1'],h.keys)
28
+ assert_equal(expected_keys_v1,h['v1'].keys.sort)
29
+ tf.close(true)
30
+ end
31
+ def test_recodification
32
+ expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
33
+ assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
34
+ v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
35
+ expected=[['r'],['w','d'],nil,['w','d']]
36
+ assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
37
+ end
38
+ def test_recode_dataset_simple
39
+ yaml=YAML::dump({'v1'=>@dict})
40
+ Statsample::Codification.recode_dataset_simple!(@ds,yaml)
41
+ expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
42
+ assert_not_equal(expected_vector,@ds['v1'])
43
+ assert_equal(expected_vector,@ds['v1_recoded'])
44
+ end
45
+ def test_recode_dataset_split
46
+ yaml=YAML::dump({'v1'=>@dict})
47
+ Statsample::Codification.recode_dataset_split!(@ds,yaml)
48
+ e={}
49
+ e['r']=[1,1,0,1,0,0,0].to_vector
50
+ e['w']=[0,1,1,0,0,0,0].to_vector
51
+ e['s']=[0,0,0,0,1,1,1].to_vector
52
+ e['d']=[0,0,0,0,0,1,1].to_vector
53
+ e.each{|k,expected|
54
+ assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
55
+
56
+ }
57
+ end
58
+
59
+ end
@@ -0,0 +1,55 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'statsample/crosstab'
3
+ require 'test/unit'
4
+
5
+ class StatsampleCrosstabTestCase < Test::Unit::TestCase
6
+
7
+ def initialize(*args)
8
+ @v1=%w{black blonde black black red black brown black blonde black red black blonde}.to_vector
9
+ @v2=%w{woman man man woman man man man woman man woman woman man man}.to_vector
10
+ @ct=Statsample::Crosstab.new(@v1,@v2)
11
+ super
12
+ end
13
+ def test_crosstab_errors
14
+ e1=%w{black blonde black black red black brown black blonde black}
15
+ assert_raise ArgumentError do
16
+ Statsample::Crosstab.new(e1,@v2)
17
+ end
18
+ e2=%w{black blonde black black red black brown black blonde black black}.to_vector
19
+
20
+ assert_raise ArgumentError do
21
+ Statsample::Crosstab.new(e2,@v2)
22
+ end
23
+ assert_nothing_raised do
24
+ Statsample::Crosstab.new(@v1,@v2)
25
+ end
26
+ end
27
+ def test_crosstab_basic
28
+ assert_equal(%w{black blonde brown red}, @ct.rows_names)
29
+ assert_equal(%w{man woman}, @ct.cols_names)
30
+ assert_equal({'black'=>7,'blonde'=>3,'red'=>2,'brown'=>1}, @ct.rows_total)
31
+ assert_equal({'man'=>8,'woman'=>5}, @ct.cols_total)
32
+ end
33
+ def test_crosstab_frequencies
34
+ fq=@ct.frequencies
35
+ assert_equal(8,fq.size)
36
+ sum=fq.inject(0) {|s,x| s+x[1]}
37
+ assert_equal(13,sum)
38
+ fr=@ct.frequencies_by_row
39
+ assert_equal(4,fr.size)
40
+ assert_equal(%w{black blonde brown red},fr.keys.sort)
41
+ fc=@ct.frequencies_by_col
42
+ assert_equal(2,fc.size)
43
+ assert_equal(%w{man woman},fc.keys.sort)
44
+ assert_equal(Matrix.rows([[3,4],[3,0],[1,0],[1,1]]),@ct.to_matrix)
45
+ end
46
+ def test_expected
47
+ v1=%w{1 1 1 1 1 0 0 0 0 0}.to_vector
48
+ v2=%w{0 0 0 0 0 1 1 1 1 1}.to_vector
49
+ ct=Statsample::Crosstab.new(v1,v2)
50
+ assert_equal(Matrix[[2.5,2.5],[2.5,2.5]],ct.matrix_expected)
51
+ end
52
+ def test_to_s
53
+ assert_match(/man\s+|\s+woman/,@ct.to_s)
54
+ end
55
+ end
data/test/test_csv.csv ADDED
@@ -0,0 +1,7 @@
1
+ "id","name","age","city","a1"
2
+ 1,"Alex",20,"New York","a,b"
3
+ 2,"Claude",23,"London","b,c"
4
+ 3,"Peter",25,"London","a"
5
+ 4,"Franz",27,"Paris",
6
+ 5,"George",5,"Tome","a,b,c"
7
+ 6,"Fernand",,,
data/test/test_csv.rb ADDED
@@ -0,0 +1,27 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'tmpdir'
3
+ require 'test/unit'
4
+
5
+ class StatsampleCSVTestCase < Test::Unit::TestCase
6
+ def initialize(*args)
7
+ @ds=Statsample::CSV.read(File.dirname(__FILE__)+"/test_csv.csv")
8
+ super
9
+ end
10
+ def test_read
11
+ assert_equal(6,@ds.cases)
12
+ assert_equal(%w{id name age city a1},@ds.fields)
13
+ end
14
+ def test_nil
15
+ assert_equal(nil,@ds['age'][5])
16
+ end
17
+ def test_write
18
+ filename=Dir::tmpdir+"/test_write.csv"
19
+ Statsample::CSV.write(@ds,filename)
20
+ ds2=Statsample::CSV.read(filename)
21
+ i=0
22
+ ds2.each_array{|row|
23
+ assert_equal(@ds.case_as_array(i),row)
24
+ i+=1
25
+ }
26
+ end
27
+ end
@@ -0,0 +1,293 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+
4
+ class StatsampleDatasetTestCase < Test::Unit::TestCase
5
+ def initialize(*args)
6
+ @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
7
+ 'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
8
+ 'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
9
+ super
10
+ end
11
+ def test_basic
12
+ assert_equal(5,@ds.cases)
13
+ assert_equal(%w{id name age city a1}, @ds.fields)
14
+ end
15
+ def test_matrix
16
+ matrix=Matrix[[1,2],[3,4],[5,6]]
17
+ ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
18
+ assert_equal(matrix,ds.to_matrix)
19
+ end
20
+
21
+ def test_fields
22
+ @ds.fields=%w{name a1 id age city}
23
+ assert_equal(%w{name a1 id age city}, @ds.fields)
24
+ @ds.fields=%w{id name age}
25
+ assert_equal(%w{id name age a1 city}, @ds.fields)
26
+ end
27
+ def test_each_vector
28
+ a=[1,2,3].to_vector
29
+ b=[3,4,5].to_vector
30
+ fields=["a","b"]
31
+ ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
32
+ res=[]
33
+ ds.each_vector{|k,v|
34
+ res.push([k,v])
35
+ }
36
+ assert_equal([["a",a],["b",b]],res)
37
+ ds.fields=["b","a"]
38
+ res=[]
39
+ ds.each_vector{|k,v|
40
+ res.push([k,v])
41
+ }
42
+ assert_equal([["b",b],["a",a]],res)
43
+ end
44
+ def test_equality
45
+ v1=[1,2,3,4].to_vector
46
+ v2=[5,6,7,8].to_vector
47
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
48
+ v3=[1,2,3,4].to_vector
49
+ v4=[5,6,7,8].to_vector
50
+ ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
51
+ assert_equal(ds1,ds2)
52
+ ds2.fields=%w{v1 v2}
53
+ assert_not_equal(ds1,ds2)
54
+ end
55
+ def test_add_vector
56
+ v=Statsample::Vector.new(%w{a b c d e})
57
+ @ds.add_vector('new',v)
58
+ assert_equal(%w{id name age city a1 new},@ds.fields)
59
+ x=Statsample::Vector.new(%w{a b c d e f g})
60
+ assert_raise ArgumentError do
61
+ @ds.add_vector('new2',x)
62
+ end
63
+ end
64
+ def test_vector_by_calculation
65
+ a1=[1,2,3,4,5,6,7].to_vector(:scale)
66
+ a2=[10,20,30,40,50,60,70].to_vector(:scale)
67
+ a3=[100,200,300,400,500,600,700].to_vector(:scale)
68
+ ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
69
+ total=ds.vector_by_calculation() {|row|
70
+ row['a1']+row['a2']+row['a3']
71
+ }
72
+ expected=[111,222,333,444,555,666,777].to_vector(:scale)
73
+ assert_equal(expected,total)
74
+ end
75
+ def test_vector_sum
76
+ a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
77
+ a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
78
+ b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
79
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
80
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
81
+ total=ds.vector_sum
82
+ a=ds.vector_sum(['a1','a2'])
83
+ b=ds.vector_sum(['b1','b2'])
84
+ expected_a=[11,12,23,24,25,nil].to_vector(:scale)
85
+ expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
86
+ expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
87
+ assert_equal(expected_a, a)
88
+ assert_equal(expected_b, b)
89
+ assert_equal(expected_total, total)
90
+ end
91
+ def test_vector_missing_values
92
+ a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
93
+ a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
94
+ b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
95
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
96
+ c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
97
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
98
+ mva=[2,3,0,1,0,1].to_vector(:scale)
99
+ assert_equal(mva,ds.vector_missing_values)
100
+ end
101
+ def test_vector_count_characters
102
+ a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
103
+ a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
104
+ b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
105
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
106
+ c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
107
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
108
+ exp=[4,17,27,5,6,5].to_vector(:scale)
109
+ assert_equal(exp,ds.vector_count_characters)
110
+
111
+ end
112
+ def test_vector_mean
113
+ a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
114
+ a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
115
+ b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
116
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
117
+ c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
118
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
119
+ total=ds.vector_mean
120
+ a=ds.vector_mean(['a1','a2'],1)
121
+ b=ds.vector_mean(['b1','b2'],1)
122
+ c=ds.vector_mean(['b1','b2','c'],1)
123
+ expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
124
+ expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
125
+ expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
126
+ expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
127
+ assert_equal(expected_a, a)
128
+ assert_equal(expected_b, b)
129
+ assert_equal(expected_c, c)
130
+ assert_equal(expected_total, total)
131
+ end
132
+
133
+ def test_each_array
134
+ expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
135
+ out=[]
136
+ @ds.each_array{ |a|
137
+ out.push(a)
138
+ }
139
+ assert_equal(expected,out)
140
+ end
141
+ def test_recode
142
+ @ds['age'].type=:scale
143
+ @ds.recode!("age") {|c| c['id']*2}
144
+ expected=[2,4,6,8,10].to_vector(:scale)
145
+ assert_equal(expected,@ds['age'])
146
+ end
147
+ def test_case_as
148
+ assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds.case_as_hash(0))
149
+ assert_equal([5,'George',5,'Tome','a,b,c'],@ds.case_as_array(4))
150
+
151
+ end
152
+ def test_delete_vector
153
+ @ds.delete_vector('name')
154
+ assert_equal(%w{id age city a1},@ds.fields)
155
+ assert_equal(%w{a1 age city id},@ds.vectors.keys.sort)
156
+ end
157
+ def test_change_type
158
+ @ds.col('age').type=:scale
159
+ assert_equal(:scale,@ds.col('age').type)
160
+ end
161
+ def test_split_by_separator_recode
162
+ @ds.add_vectors_by_split_recode("a1","_")
163
+ assert_equal(%w{id name age city a1 a1_1 a1_2 a1_3},@ds.fields)
164
+ assert_equal([1,0,1,nil,1],@ds.col('a1_1').to_a)
165
+ assert_equal([1,1,0,nil,1],@ds.col('a1_2').to_a)
166
+ assert_equal([0,1,0,nil,1],@ds.col('a1_3').to_a)
167
+ assert_equal({'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'},@ds.labels)
168
+ end
169
+ def test_split_by_separator
170
+ @ds.add_vectors_by_split("a1","_")
171
+ assert_equal(%w{id name age city a1 a1_a a1_b a1_c},@ds.fields)
172
+ assert_equal([1,0,1,nil,1],@ds.col('a1_a').to_a)
173
+ assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
174
+ assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
175
+ end
176
+
177
+ def test_add_case
178
+ ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
179
+ ds.add_case([1,2,3])
180
+ ds.add_case({'a'=>4,'b'=>5,'c'=>6})
181
+ ds.add_case([[7,8,9],%w{a b c}])
182
+ assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
183
+ assert_equal([4,5,6],ds.case_as_array(1))
184
+ assert_equal([7,8,9],ds.case_as_array(2))
185
+ assert_equal(['a','b','c'],ds.case_as_array(3))
186
+ ds.add_case_array([6,7,1])
187
+ ds.update_valid_data
188
+ assert_equal([6,7,1],ds.case_as_array(4))
189
+
190
+ end
191
+ def test_marshaling
192
+ ds_marshal=Marshal.load(Marshal.dump(@ds))
193
+ assert_equal(ds_marshal,@ds)
194
+ end
195
+ def test_range
196
+ v1=[1,2,3,4].to_vector
197
+ v2=[5,6,7,8].to_vector
198
+ v3=[9,10,11,12].to_vector
199
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
200
+ assert_same(v1,ds1['v1'])
201
+ ds2=ds1["v2".."v1"]
202
+ assert_equal(%w{v2 v1},ds2.fields)
203
+ assert_same(ds1['v1'],ds2['v1'])
204
+ assert_same(ds1['v2'],ds2['v2'])
205
+
206
+
207
+ end
208
+ def test_dup
209
+ v1=[1,2,3,4].to_vector
210
+ v2=[5,6,7,8].to_vector
211
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
212
+ ds2=ds1.dup
213
+ assert_equal(ds1,ds2)
214
+ assert_not_same(ds1,ds2)
215
+ assert_equal(ds1['v1'],ds2['v1'])
216
+ assert_not_same(ds1['v1'],ds2['v1'])
217
+ assert_equal(ds1.fields,ds2.fields)
218
+ assert_not_same(ds1.fields,ds2.fields)
219
+ ds1['v1'].type=:scale
220
+ # dup partial
221
+ ds3=ds1.dup('v1')
222
+ ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
223
+ assert_equal(ds_exp,ds3)
224
+ assert_not_same(ds_exp,ds3)
225
+ assert_equal(ds3['v1'],ds_exp['v1'])
226
+ assert_not_same(ds3['v1'],ds_exp['v1'])
227
+ assert_equal(ds3.fields,ds_exp.fields)
228
+ assert_not_same(ds3.fields,ds_exp.fields)
229
+
230
+
231
+ # empty
232
+ ds3=ds1.dup_empty
233
+ assert_not_equal(ds1,ds3)
234
+ assert_not_equal(ds1['v1'],ds3['v1'])
235
+ assert_equal([],ds3['v1'].data)
236
+ assert_equal([],ds3['v2'].data)
237
+ assert_equal(:scale,ds3['v1'].type)
238
+ assert_equal(ds1.fields,ds2.fields)
239
+ assert_not_same(ds1.fields,ds2.fields)
240
+ end
241
+ def test_from_to
242
+ assert_equal(%w{name age city}, @ds.from_to("name","city"))
243
+ assert_raise ArgumentError do
244
+ @ds.from_to("name","a2")
245
+ end
246
+ end
247
+ def test_dup_only_valid
248
+ v1=[1,nil,3,4].to_vector(:scale)
249
+ v2=[5,6,nil,8].to_vector(:scale)
250
+ v3=[9,10,11,12].to_vector(:scale)
251
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
252
+ ds2=ds1.dup_only_valid
253
+ expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
254
+ assert_equal(expected,ds2)
255
+ assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
256
+ end
257
+ def test_filter
258
+ @ds['age'].type=:scale
259
+ filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
260
+ expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
261
+ 'city'=>Statsample::Vector.new(['London','Paris']),
262
+ 'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
263
+ assert_equal(expected,filtered)
264
+ end
265
+ def test_filter_field
266
+ @ds['age'].type=:scale
267
+ filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
268
+ expected=[2,4].to_vector
269
+ assert_equal(expected,filtered)
270
+
271
+ end
272
+ def test_verify
273
+ name=%w{r1 r2 r3 r4}.to_vector(:nominal)
274
+ v1=[1,2,3,4].to_vector(:scale)
275
+ v2=[4,3,2,1].to_vector(:scale)
276
+ v3=[10,20,30,40].to_vector(:scale)
277
+ v4=%w{a b a b}.to_vector(:nominal)
278
+ ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
279
+ ds.fields=%w{v1 v2 v3 v4 id}
280
+ #Correct
281
+ t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
282
+ t2=create_test("v3=v1*10") {|r| r['v3']==r['v1']*10}
283
+ # Fail!
284
+ t3=create_test("v4='b'") {|r| r['v4']=='b'}
285
+ exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
286
+ exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
287
+ res=ds.verify(t3,t1,t2)
288
+ assert_equal(exp1,res)
289
+ res=ds.verify('id',t1,t2,t3)
290
+ assert_equal(exp2,res)
291
+
292
+ end
293
+ end