statsample 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/History.txt +79 -0
  2. data/Manifest.txt +56 -0
  3. data/README.txt +77 -0
  4. data/Rakefile +22 -0
  5. data/bin/statsample +2 -0
  6. data/demo/benchmark.rb +52 -0
  7. data/demo/chi-square.rb +44 -0
  8. data/demo/dice.rb +13 -0
  9. data/demo/distribution_t.rb +95 -0
  10. data/demo/graph.rb +9 -0
  11. data/demo/item_analysis.rb +30 -0
  12. data/demo/mean.rb +81 -0
  13. data/demo/proportion.rb +57 -0
  14. data/demo/sample_test.csv +113 -0
  15. data/demo/strata_proportion.rb +152 -0
  16. data/demo/stratum.rb +141 -0
  17. data/lib/spss.rb +131 -0
  18. data/lib/statsample.rb +216 -0
  19. data/lib/statsample/anova.rb +74 -0
  20. data/lib/statsample/bivariate.rb +255 -0
  21. data/lib/statsample/chidistribution.rb +39 -0
  22. data/lib/statsample/codification.rb +120 -0
  23. data/lib/statsample/converters.rb +338 -0
  24. data/lib/statsample/crosstab.rb +122 -0
  25. data/lib/statsample/dataset.rb +526 -0
  26. data/lib/statsample/dominanceanalysis.rb +259 -0
  27. data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
  28. data/lib/statsample/graph/gdchart.rb +45 -0
  29. data/lib/statsample/graph/svgboxplot.rb +108 -0
  30. data/lib/statsample/graph/svggraph.rb +181 -0
  31. data/lib/statsample/graph/svghistogram.rb +208 -0
  32. data/lib/statsample/graph/svgscatterplot.rb +111 -0
  33. data/lib/statsample/htmlreport.rb +232 -0
  34. data/lib/statsample/multiset.rb +281 -0
  35. data/lib/statsample/regression.rb +522 -0
  36. data/lib/statsample/reliability.rb +235 -0
  37. data/lib/statsample/resample.rb +20 -0
  38. data/lib/statsample/srs.rb +159 -0
  39. data/lib/statsample/test.rb +25 -0
  40. data/lib/statsample/vector.rb +759 -0
  41. data/test/_test_chart.rb +58 -0
  42. data/test/test_anova.rb +31 -0
  43. data/test/test_codification.rb +59 -0
  44. data/test/test_crosstab.rb +55 -0
  45. data/test/test_csv.csv +7 -0
  46. data/test/test_csv.rb +27 -0
  47. data/test/test_dataset.rb +293 -0
  48. data/test/test_ggobi.rb +42 -0
  49. data/test/test_multiset.rb +98 -0
  50. data/test/test_regression.rb +108 -0
  51. data/test/test_reliability.rb +32 -0
  52. data/test/test_resample.rb +23 -0
  53. data/test/test_srs.rb +14 -0
  54. data/test/test_statistics.rb +152 -0
  55. data/test/test_stratified.rb +19 -0
  56. data/test/test_svg_graph.rb +63 -0
  57. data/test/test_vector.rb +265 -0
  58. data/test/test_xls.rb +32 -0
  59. metadata +158 -0
@@ -0,0 +1,58 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'tempfile'
3
+ require 'test/unit'
4
+ require 'statsample/chart/gdchart'
5
+ # Not included on default test, because GDChart send a lot of warnings!
6
+ class StatsampleChartTestCase < Test::Unit::TestCase
7
+
8
+ def initialize(*args)
9
+ @image_path=File.dirname(__FILE__)+"/images"
10
+ super
11
+ end
12
+
13
+ def test_base_chart
14
+ file=@image_path+"/gdchart_base_bar_1.jpg"
15
+ width=500
16
+ height=300
17
+ chart_type=GDChart::BAR
18
+ labels=["a","b","c","d","e"]
19
+ options={'set_color'=>[0xFF3399]}
20
+ n_data=1
21
+ data=[10,40,30,20,40]
22
+
23
+ Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
24
+ assert(File.exists?(file))
25
+ %w{STACK_DEPTH STACK_SUM STACK_BESIDE STACK_LAYER}.each{|stack|
26
+ file=@image_path+"/gdchart_base_bar_2_#{stack}.jpg"
27
+ n_data=2
28
+ options={'set_color'=>[0xFF3399,0x33FF99,0xFF99FF,0xFF3399], 'stack_type'=>GDChart.const_get(stack.intern),'title'=>"Bar #{stack}"}
29
+
30
+ chart_type=GDChart::BAR
31
+
32
+ data=[10,15,10,20,30,30,20,5,15,20]
33
+ Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
34
+ assert(File.exists?(file))
35
+ }
36
+ end
37
+ def test_vector
38
+ file=@image_path+"/gdchart_bar.jpg"
39
+ ar=[]
40
+ (1..1000).each {|a|
41
+ ar.push(rand(10))
42
+ }
43
+ vector=ar.to_vector
44
+ file=@image_path+"/gdchart_bar.jpg"
45
+ vector.gdchart_frequencies(file,800,600,GDChart::BAR,'title'=>'Bar')
46
+ assert(File.exists?(file))
47
+ file=@image_path+"/gdchart_bar3d.jpg"
48
+ vector.gdchart_frequencies(file,300,100,GDChart::BAR3D,'title'=>'Bar3D')
49
+ assert(File.exists?(file))
50
+ file=@image_path+"/gdchart_floatingbar.jpg"
51
+ vector.gdchart_frequencies(file,200,200,GDChart::LINE,'title'=>'FloatingBar')
52
+ assert(File.exists?(file))
53
+ vector.type=:scale
54
+ file=@image_path+"/gdchart_histogram.jpg"
55
+ vector.gdchart_histogram(5,file,300,400,GDChart::BAR,'title'=>'Histogram')
56
+ assert(File.exists?(file))
57
+ end
58
+ end
@@ -0,0 +1,31 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample.rb'
2
+ require 'test/unit'
3
+
4
+ class StatsampleAnovaTestCase < Test::Unit::TestCase
5
+ def initialize(*args)
6
+ @v1=[3,3,2,3,6].to_vector(:scale)
7
+ @v2=[7,6,5,6,7].to_vector(:scale)
8
+ @v3=[9,8,9,7,8].to_vector(:scale)
9
+ @anova=Statsample::Anova::OneWay.new([@v1,@v2,@v3])
10
+ super
11
+ end
12
+ def test_basic
13
+ assert_in_delta(72.933, @anova.sst,0.001)
14
+ assert_in_delta(14.8,@anova.sswg,0.001)
15
+ assert_in_delta(58.133,@anova.ssbg,0.001)
16
+ assert_in_delta(@anova.sst,@anova.sswg+@anova.ssbg,0.00001)
17
+ assert_equal(14,@anova.df_total)
18
+ assert_equal(12,@anova.df_wg)
19
+ assert_equal(2,@anova.df_bg)
20
+ assert_in_delta(23.568,@anova.f,0.001)
21
+ anova2=Statsample::Anova::OneWay.new([@v1,@v1,@v1,@v1,@v2])
22
+ assert_in_delta(3.960, anova2.f,0.001)
23
+
24
+ if HAS_GSL
25
+ assert(@anova.significance<0.01)
26
+ assert_in_delta(0.016, anova2.significance,0.001)
27
+ else
28
+ puts "Skipped OneWay#significance (no GSL)"
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,59 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'tempfile'
3
+ require 'test/unit'
4
+
5
+ class StatsampleCodificationTestCase < Test::Unit::TestCase
6
+
7
+ def initialize(*args)
8
+ v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
9
+ @dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s','dream'=>'d','dreaming'=>'d'}
10
+ @ds={"v1"=>v1}.to_dataset
11
+ super
12
+ end
13
+ def test_create_yaml
14
+ assert_raise ArgumentError do
15
+ Statsample::Codification.create_yaml(@ds,[])
16
+ end
17
+ expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
18
+ yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
19
+ h=YAML::load(yaml_hash)
20
+ assert_equal(['v1'],h.keys)
21
+ assert_equal(expected_keys_v1,h['v1'].keys.sort)
22
+ tf = Tempfile.new("test_codification")
23
+ yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],Statsample::SPLIT_TOKEN,tf)
24
+ tf.close
25
+ tf.open
26
+ h=YAML::load(tf)
27
+ assert_equal(['v1'],h.keys)
28
+ assert_equal(expected_keys_v1,h['v1'].keys.sort)
29
+ tf.close(true)
30
+ end
31
+ def test_recodification
32
+ expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
33
+ assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
34
+ v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
35
+ expected=[['r'],['w','d'],nil,['w','d']]
36
+ assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
37
+ end
38
+ def test_recode_dataset_simple
39
+ yaml=YAML::dump({'v1'=>@dict})
40
+ Statsample::Codification.recode_dataset_simple!(@ds,yaml)
41
+ expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
42
+ assert_not_equal(expected_vector,@ds['v1'])
43
+ assert_equal(expected_vector,@ds['v1_recoded'])
44
+ end
45
+ def test_recode_dataset_split
46
+ yaml=YAML::dump({'v1'=>@dict})
47
+ Statsample::Codification.recode_dataset_split!(@ds,yaml)
48
+ e={}
49
+ e['r']=[1,1,0,1,0,0,0].to_vector
50
+ e['w']=[0,1,1,0,0,0,0].to_vector
51
+ e['s']=[0,0,0,0,1,1,1].to_vector
52
+ e['d']=[0,0,0,0,0,1,1].to_vector
53
+ e.each{|k,expected|
54
+ assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
55
+
56
+ }
57
+ end
58
+
59
+ end
@@ -0,0 +1,55 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'statsample/crosstab'
3
+ require 'test/unit'
4
+
5
+ class StatsampleCrosstabTestCase < Test::Unit::TestCase
6
+
7
+ def initialize(*args)
8
+ @v1=%w{black blonde black black red black brown black blonde black red black blonde}.to_vector
9
+ @v2=%w{woman man man woman man man man woman man woman woman man man}.to_vector
10
+ @ct=Statsample::Crosstab.new(@v1,@v2)
11
+ super
12
+ end
13
+ def test_crosstab_errors
14
+ e1=%w{black blonde black black red black brown black blonde black}
15
+ assert_raise ArgumentError do
16
+ Statsample::Crosstab.new(e1,@v2)
17
+ end
18
+ e2=%w{black blonde black black red black brown black blonde black black}.to_vector
19
+
20
+ assert_raise ArgumentError do
21
+ Statsample::Crosstab.new(e2,@v2)
22
+ end
23
+ assert_nothing_raised do
24
+ Statsample::Crosstab.new(@v1,@v2)
25
+ end
26
+ end
27
+ def test_crosstab_basic
28
+ assert_equal(%w{black blonde brown red}, @ct.rows_names)
29
+ assert_equal(%w{man woman}, @ct.cols_names)
30
+ assert_equal({'black'=>7,'blonde'=>3,'red'=>2,'brown'=>1}, @ct.rows_total)
31
+ assert_equal({'man'=>8,'woman'=>5}, @ct.cols_total)
32
+ end
33
+ def test_crosstab_frequencies
34
+ fq=@ct.frequencies
35
+ assert_equal(8,fq.size)
36
+ sum=fq.inject(0) {|s,x| s+x[1]}
37
+ assert_equal(13,sum)
38
+ fr=@ct.frequencies_by_row
39
+ assert_equal(4,fr.size)
40
+ assert_equal(%w{black blonde brown red},fr.keys.sort)
41
+ fc=@ct.frequencies_by_col
42
+ assert_equal(2,fc.size)
43
+ assert_equal(%w{man woman},fc.keys.sort)
44
+ assert_equal(Matrix.rows([[3,4],[3,0],[1,0],[1,1]]),@ct.to_matrix)
45
+ end
46
+ def test_expected
47
+ v1=%w{1 1 1 1 1 0 0 0 0 0}.to_vector
48
+ v2=%w{0 0 0 0 0 1 1 1 1 1}.to_vector
49
+ ct=Statsample::Crosstab.new(v1,v2)
50
+ assert_equal(Matrix[[2.5,2.5],[2.5,2.5]],ct.matrix_expected)
51
+ end
52
+ def test_to_s
53
+ assert_match(/man\s+|\s+woman/,@ct.to_s)
54
+ end
55
+ end
data/test/test_csv.csv ADDED
@@ -0,0 +1,7 @@
1
+ "id","name","age","city","a1"
2
+ 1,"Alex",20,"New York","a,b"
3
+ 2,"Claude",23,"London","b,c"
4
+ 3,"Peter",25,"London","a"
5
+ 4,"Franz",27,"Paris",
6
+ 5,"George",5,"Tome","a,b,c"
7
+ 6,"Fernand",,,
data/test/test_csv.rb ADDED
@@ -0,0 +1,27 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'tmpdir'
3
+ require 'test/unit'
4
+
5
+ class StatsampleCSVTestCase < Test::Unit::TestCase
6
+ def initialize(*args)
7
+ @ds=Statsample::CSV.read(File.dirname(__FILE__)+"/test_csv.csv")
8
+ super
9
+ end
10
+ def test_read
11
+ assert_equal(6,@ds.cases)
12
+ assert_equal(%w{id name age city a1},@ds.fields)
13
+ end
14
+ def test_nil
15
+ assert_equal(nil,@ds['age'][5])
16
+ end
17
+ def test_write
18
+ filename=Dir::tmpdir+"/test_write.csv"
19
+ Statsample::CSV.write(@ds,filename)
20
+ ds2=Statsample::CSV.read(filename)
21
+ i=0
22
+ ds2.each_array{|row|
23
+ assert_equal(@ds.case_as_array(i),row)
24
+ i+=1
25
+ }
26
+ end
27
+ end
@@ -0,0 +1,293 @@
1
+ require File.dirname(__FILE__)+'/../lib/statsample'
2
+ require 'test/unit'
3
+
4
+ class StatsampleDatasetTestCase < Test::Unit::TestCase
5
+ def initialize(*args)
6
+ @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
7
+ 'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
8
+ 'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
9
+ super
10
+ end
11
+ def test_basic
12
+ assert_equal(5,@ds.cases)
13
+ assert_equal(%w{id name age city a1}, @ds.fields)
14
+ end
15
+ def test_matrix
16
+ matrix=Matrix[[1,2],[3,4],[5,6]]
17
+ ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
18
+ assert_equal(matrix,ds.to_matrix)
19
+ end
20
+
21
+ def test_fields
22
+ @ds.fields=%w{name a1 id age city}
23
+ assert_equal(%w{name a1 id age city}, @ds.fields)
24
+ @ds.fields=%w{id name age}
25
+ assert_equal(%w{id name age a1 city}, @ds.fields)
26
+ end
27
+ def test_each_vector
28
+ a=[1,2,3].to_vector
29
+ b=[3,4,5].to_vector
30
+ fields=["a","b"]
31
+ ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
32
+ res=[]
33
+ ds.each_vector{|k,v|
34
+ res.push([k,v])
35
+ }
36
+ assert_equal([["a",a],["b",b]],res)
37
+ ds.fields=["b","a"]
38
+ res=[]
39
+ ds.each_vector{|k,v|
40
+ res.push([k,v])
41
+ }
42
+ assert_equal([["b",b],["a",a]],res)
43
+ end
44
+ def test_equality
45
+ v1=[1,2,3,4].to_vector
46
+ v2=[5,6,7,8].to_vector
47
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
48
+ v3=[1,2,3,4].to_vector
49
+ v4=[5,6,7,8].to_vector
50
+ ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
51
+ assert_equal(ds1,ds2)
52
+ ds2.fields=%w{v1 v2}
53
+ assert_not_equal(ds1,ds2)
54
+ end
55
+ def test_add_vector
56
+ v=Statsample::Vector.new(%w{a b c d e})
57
+ @ds.add_vector('new',v)
58
+ assert_equal(%w{id name age city a1 new},@ds.fields)
59
+ x=Statsample::Vector.new(%w{a b c d e f g})
60
+ assert_raise ArgumentError do
61
+ @ds.add_vector('new2',x)
62
+ end
63
+ end
64
+ def test_vector_by_calculation
65
+ a1=[1,2,3,4,5,6,7].to_vector(:scale)
66
+ a2=[10,20,30,40,50,60,70].to_vector(:scale)
67
+ a3=[100,200,300,400,500,600,700].to_vector(:scale)
68
+ ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
69
+ total=ds.vector_by_calculation() {|row|
70
+ row['a1']+row['a2']+row['a3']
71
+ }
72
+ expected=[111,222,333,444,555,666,777].to_vector(:scale)
73
+ assert_equal(expected,total)
74
+ end
75
+ def test_vector_sum
76
+ a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
77
+ a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
78
+ b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
79
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
80
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
81
+ total=ds.vector_sum
82
+ a=ds.vector_sum(['a1','a2'])
83
+ b=ds.vector_sum(['b1','b2'])
84
+ expected_a=[11,12,23,24,25,nil].to_vector(:scale)
85
+ expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
86
+ expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
87
+ assert_equal(expected_a, a)
88
+ assert_equal(expected_b, b)
89
+ assert_equal(expected_total, total)
90
+ end
91
+ def test_vector_missing_values
92
+ a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
93
+ a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
94
+ b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
95
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
96
+ c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
97
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
98
+ mva=[2,3,0,1,0,1].to_vector(:scale)
99
+ assert_equal(mva,ds.vector_missing_values)
100
+ end
101
+ def test_vector_count_characters
102
+ a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
103
+ a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
104
+ b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
105
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
106
+ c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
107
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
108
+ exp=[4,17,27,5,6,5].to_vector(:scale)
109
+ assert_equal(exp,ds.vector_count_characters)
110
+
111
+ end
112
+ def test_vector_mean
113
+ a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
114
+ a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
115
+ b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
116
+ b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
117
+ c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
118
+ ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
119
+ total=ds.vector_mean
120
+ a=ds.vector_mean(['a1','a2'],1)
121
+ b=ds.vector_mean(['b1','b2'],1)
122
+ c=ds.vector_mean(['b1','b2','c'],1)
123
+ expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
124
+ expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
125
+ expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
126
+ expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
127
+ assert_equal(expected_a, a)
128
+ assert_equal(expected_b, b)
129
+ assert_equal(expected_c, c)
130
+ assert_equal(expected_total, total)
131
+ end
132
+
133
+ def test_each_array
134
+ expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
135
+ out=[]
136
+ @ds.each_array{ |a|
137
+ out.push(a)
138
+ }
139
+ assert_equal(expected,out)
140
+ end
141
+ def test_recode
142
+ @ds['age'].type=:scale
143
+ @ds.recode!("age") {|c| c['id']*2}
144
+ expected=[2,4,6,8,10].to_vector(:scale)
145
+ assert_equal(expected,@ds['age'])
146
+ end
147
+ def test_case_as
148
+ assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds.case_as_hash(0))
149
+ assert_equal([5,'George',5,'Tome','a,b,c'],@ds.case_as_array(4))
150
+
151
+ end
152
+ def test_delete_vector
153
+ @ds.delete_vector('name')
154
+ assert_equal(%w{id age city a1},@ds.fields)
155
+ assert_equal(%w{a1 age city id},@ds.vectors.keys.sort)
156
+ end
157
+ def test_change_type
158
+ @ds.col('age').type=:scale
159
+ assert_equal(:scale,@ds.col('age').type)
160
+ end
161
+ def test_split_by_separator_recode
162
+ @ds.add_vectors_by_split_recode("a1","_")
163
+ assert_equal(%w{id name age city a1 a1_1 a1_2 a1_3},@ds.fields)
164
+ assert_equal([1,0,1,nil,1],@ds.col('a1_1').to_a)
165
+ assert_equal([1,1,0,nil,1],@ds.col('a1_2').to_a)
166
+ assert_equal([0,1,0,nil,1],@ds.col('a1_3').to_a)
167
+ assert_equal({'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'},@ds.labels)
168
+ end
169
+ def test_split_by_separator
170
+ @ds.add_vectors_by_split("a1","_")
171
+ assert_equal(%w{id name age city a1 a1_a a1_b a1_c},@ds.fields)
172
+ assert_equal([1,0,1,nil,1],@ds.col('a1_a').to_a)
173
+ assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
174
+ assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
175
+ end
176
+
177
+ def test_add_case
178
+ ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
179
+ ds.add_case([1,2,3])
180
+ ds.add_case({'a'=>4,'b'=>5,'c'=>6})
181
+ ds.add_case([[7,8,9],%w{a b c}])
182
+ assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
183
+ assert_equal([4,5,6],ds.case_as_array(1))
184
+ assert_equal([7,8,9],ds.case_as_array(2))
185
+ assert_equal(['a','b','c'],ds.case_as_array(3))
186
+ ds.add_case_array([6,7,1])
187
+ ds.update_valid_data
188
+ assert_equal([6,7,1],ds.case_as_array(4))
189
+
190
+ end
191
+ def test_marshaling
192
+ ds_marshal=Marshal.load(Marshal.dump(@ds))
193
+ assert_equal(ds_marshal,@ds)
194
+ end
195
+ def test_range
196
+ v1=[1,2,3,4].to_vector
197
+ v2=[5,6,7,8].to_vector
198
+ v3=[9,10,11,12].to_vector
199
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
200
+ assert_same(v1,ds1['v1'])
201
+ ds2=ds1["v2".."v1"]
202
+ assert_equal(%w{v2 v1},ds2.fields)
203
+ assert_same(ds1['v1'],ds2['v1'])
204
+ assert_same(ds1['v2'],ds2['v2'])
205
+
206
+
207
+ end
208
+ def test_dup
209
+ v1=[1,2,3,4].to_vector
210
+ v2=[5,6,7,8].to_vector
211
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
212
+ ds2=ds1.dup
213
+ assert_equal(ds1,ds2)
214
+ assert_not_same(ds1,ds2)
215
+ assert_equal(ds1['v1'],ds2['v1'])
216
+ assert_not_same(ds1['v1'],ds2['v1'])
217
+ assert_equal(ds1.fields,ds2.fields)
218
+ assert_not_same(ds1.fields,ds2.fields)
219
+ ds1['v1'].type=:scale
220
+ # dup partial
221
+ ds3=ds1.dup('v1')
222
+ ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
223
+ assert_equal(ds_exp,ds3)
224
+ assert_not_same(ds_exp,ds3)
225
+ assert_equal(ds3['v1'],ds_exp['v1'])
226
+ assert_not_same(ds3['v1'],ds_exp['v1'])
227
+ assert_equal(ds3.fields,ds_exp.fields)
228
+ assert_not_same(ds3.fields,ds_exp.fields)
229
+
230
+
231
+ # empty
232
+ ds3=ds1.dup_empty
233
+ assert_not_equal(ds1,ds3)
234
+ assert_not_equal(ds1['v1'],ds3['v1'])
235
+ assert_equal([],ds3['v1'].data)
236
+ assert_equal([],ds3['v2'].data)
237
+ assert_equal(:scale,ds3['v1'].type)
238
+ assert_equal(ds1.fields,ds2.fields)
239
+ assert_not_same(ds1.fields,ds2.fields)
240
+ end
241
+ def test_from_to
242
+ assert_equal(%w{name age city}, @ds.from_to("name","city"))
243
+ assert_raise ArgumentError do
244
+ @ds.from_to("name","a2")
245
+ end
246
+ end
247
+ def test_dup_only_valid
248
+ v1=[1,nil,3,4].to_vector(:scale)
249
+ v2=[5,6,nil,8].to_vector(:scale)
250
+ v3=[9,10,11,12].to_vector(:scale)
251
+ ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
252
+ ds2=ds1.dup_only_valid
253
+ expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
254
+ assert_equal(expected,ds2)
255
+ assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
256
+ end
257
+ def test_filter
258
+ @ds['age'].type=:scale
259
+ filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
260
+ expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
261
+ 'city'=>Statsample::Vector.new(['London','Paris']),
262
+ 'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
263
+ assert_equal(expected,filtered)
264
+ end
265
+ def test_filter_field
266
+ @ds['age'].type=:scale
267
+ filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
268
+ expected=[2,4].to_vector
269
+ assert_equal(expected,filtered)
270
+
271
+ end
272
+ def test_verify
273
+ name=%w{r1 r2 r3 r4}.to_vector(:nominal)
274
+ v1=[1,2,3,4].to_vector(:scale)
275
+ v2=[4,3,2,1].to_vector(:scale)
276
+ v3=[10,20,30,40].to_vector(:scale)
277
+ v4=%w{a b a b}.to_vector(:nominal)
278
+ ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
279
+ ds.fields=%w{v1 v2 v3 v4 id}
280
+ #Correct
281
+ t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
282
+ t2=create_test("v3=v1*10") {|r| r['v3']==r['v1']*10}
283
+ # Fail!
284
+ t3=create_test("v4='b'") {|r| r['v4']=='b'}
285
+ exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
286
+ exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
287
+ res=ds.verify(t3,t1,t2)
288
+ assert_equal(exp1,res)
289
+ res=ds.verify('id',t1,t2,t3)
290
+ assert_equal(exp2,res)
291
+
292
+ end
293
+ end