statsample 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +79 -0
- data/Manifest.txt +56 -0
- data/README.txt +77 -0
- data/Rakefile +22 -0
- data/bin/statsample +2 -0
- data/demo/benchmark.rb +52 -0
- data/demo/chi-square.rb +44 -0
- data/demo/dice.rb +13 -0
- data/demo/distribution_t.rb +95 -0
- data/demo/graph.rb +9 -0
- data/demo/item_analysis.rb +30 -0
- data/demo/mean.rb +81 -0
- data/demo/proportion.rb +57 -0
- data/demo/sample_test.csv +113 -0
- data/demo/strata_proportion.rb +152 -0
- data/demo/stratum.rb +141 -0
- data/lib/spss.rb +131 -0
- data/lib/statsample.rb +216 -0
- data/lib/statsample/anova.rb +74 -0
- data/lib/statsample/bivariate.rb +255 -0
- data/lib/statsample/chidistribution.rb +39 -0
- data/lib/statsample/codification.rb +120 -0
- data/lib/statsample/converters.rb +338 -0
- data/lib/statsample/crosstab.rb +122 -0
- data/lib/statsample/dataset.rb +526 -0
- data/lib/statsample/dominanceanalysis.rb +259 -0
- data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
- data/lib/statsample/graph/gdchart.rb +45 -0
- data/lib/statsample/graph/svgboxplot.rb +108 -0
- data/lib/statsample/graph/svggraph.rb +181 -0
- data/lib/statsample/graph/svghistogram.rb +208 -0
- data/lib/statsample/graph/svgscatterplot.rb +111 -0
- data/lib/statsample/htmlreport.rb +232 -0
- data/lib/statsample/multiset.rb +281 -0
- data/lib/statsample/regression.rb +522 -0
- data/lib/statsample/reliability.rb +235 -0
- data/lib/statsample/resample.rb +20 -0
- data/lib/statsample/srs.rb +159 -0
- data/lib/statsample/test.rb +25 -0
- data/lib/statsample/vector.rb +759 -0
- data/test/_test_chart.rb +58 -0
- data/test/test_anova.rb +31 -0
- data/test/test_codification.rb +59 -0
- data/test/test_crosstab.rb +55 -0
- data/test/test_csv.csv +7 -0
- data/test/test_csv.rb +27 -0
- data/test/test_dataset.rb +293 -0
- data/test/test_ggobi.rb +42 -0
- data/test/test_multiset.rb +98 -0
- data/test/test_regression.rb +108 -0
- data/test/test_reliability.rb +32 -0
- data/test/test_resample.rb +23 -0
- data/test/test_srs.rb +14 -0
- data/test/test_statistics.rb +152 -0
- data/test/test_stratified.rb +19 -0
- data/test/test_svg_graph.rb +63 -0
- data/test/test_vector.rb +265 -0
- data/test/test_xls.rb +32 -0
- metadata +158 -0
data/test/_test_chart.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'tempfile'
|
3
|
+
require 'test/unit'
|
4
|
+
require 'statsample/chart/gdchart'
|
5
|
+
# Not included on default test, because GDChart send a lot of warnings!
|
6
|
+
class StatsampleChartTestCase < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
@image_path=File.dirname(__FILE__)+"/images"
|
10
|
+
super
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_base_chart
|
14
|
+
file=@image_path+"/gdchart_base_bar_1.jpg"
|
15
|
+
width=500
|
16
|
+
height=300
|
17
|
+
chart_type=GDChart::BAR
|
18
|
+
labels=["a","b","c","d","e"]
|
19
|
+
options={'set_color'=>[0xFF3399]}
|
20
|
+
n_data=1
|
21
|
+
data=[10,40,30,20,40]
|
22
|
+
|
23
|
+
Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
|
24
|
+
assert(File.exists?(file))
|
25
|
+
%w{STACK_DEPTH STACK_SUM STACK_BESIDE STACK_LAYER}.each{|stack|
|
26
|
+
file=@image_path+"/gdchart_base_bar_2_#{stack}.jpg"
|
27
|
+
n_data=2
|
28
|
+
options={'set_color'=>[0xFF3399,0x33FF99,0xFF99FF,0xFF3399], 'stack_type'=>GDChart.const_get(stack.intern),'title'=>"Bar #{stack}"}
|
29
|
+
|
30
|
+
chart_type=GDChart::BAR
|
31
|
+
|
32
|
+
data=[10,15,10,20,30,30,20,5,15,20]
|
33
|
+
Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
|
34
|
+
assert(File.exists?(file))
|
35
|
+
}
|
36
|
+
end
|
37
|
+
def test_vector
|
38
|
+
file=@image_path+"/gdchart_bar.jpg"
|
39
|
+
ar=[]
|
40
|
+
(1..1000).each {|a|
|
41
|
+
ar.push(rand(10))
|
42
|
+
}
|
43
|
+
vector=ar.to_vector
|
44
|
+
file=@image_path+"/gdchart_bar.jpg"
|
45
|
+
vector.gdchart_frequencies(file,800,600,GDChart::BAR,'title'=>'Bar')
|
46
|
+
assert(File.exists?(file))
|
47
|
+
file=@image_path+"/gdchart_bar3d.jpg"
|
48
|
+
vector.gdchart_frequencies(file,300,100,GDChart::BAR3D,'title'=>'Bar3D')
|
49
|
+
assert(File.exists?(file))
|
50
|
+
file=@image_path+"/gdchart_floatingbar.jpg"
|
51
|
+
vector.gdchart_frequencies(file,200,200,GDChart::LINE,'title'=>'FloatingBar')
|
52
|
+
assert(File.exists?(file))
|
53
|
+
vector.type=:scale
|
54
|
+
file=@image_path+"/gdchart_histogram.jpg"
|
55
|
+
vector.gdchart_histogram(5,file,300,400,GDChart::BAR,'title'=>'Histogram')
|
56
|
+
assert(File.exists?(file))
|
57
|
+
end
|
58
|
+
end
|
data/test/test_anova.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample.rb'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleAnovaTestCase < Test::Unit::TestCase
|
5
|
+
def initialize(*args)
|
6
|
+
@v1=[3,3,2,3,6].to_vector(:scale)
|
7
|
+
@v2=[7,6,5,6,7].to_vector(:scale)
|
8
|
+
@v3=[9,8,9,7,8].to_vector(:scale)
|
9
|
+
@anova=Statsample::Anova::OneWay.new([@v1,@v2,@v3])
|
10
|
+
super
|
11
|
+
end
|
12
|
+
def test_basic
|
13
|
+
assert_in_delta(72.933, @anova.sst,0.001)
|
14
|
+
assert_in_delta(14.8,@anova.sswg,0.001)
|
15
|
+
assert_in_delta(58.133,@anova.ssbg,0.001)
|
16
|
+
assert_in_delta(@anova.sst,@anova.sswg+@anova.ssbg,0.00001)
|
17
|
+
assert_equal(14,@anova.df_total)
|
18
|
+
assert_equal(12,@anova.df_wg)
|
19
|
+
assert_equal(2,@anova.df_bg)
|
20
|
+
assert_in_delta(23.568,@anova.f,0.001)
|
21
|
+
anova2=Statsample::Anova::OneWay.new([@v1,@v1,@v1,@v1,@v2])
|
22
|
+
assert_in_delta(3.960, anova2.f,0.001)
|
23
|
+
|
24
|
+
if HAS_GSL
|
25
|
+
assert(@anova.significance<0.01)
|
26
|
+
assert_in_delta(0.016, anova2.significance,0.001)
|
27
|
+
else
|
28
|
+
puts "Skipped OneWay#significance (no GSL)"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'tempfile'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StatsampleCodificationTestCase < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
|
9
|
+
@dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s','dream'=>'d','dreaming'=>'d'}
|
10
|
+
@ds={"v1"=>v1}.to_dataset
|
11
|
+
super
|
12
|
+
end
|
13
|
+
def test_create_yaml
|
14
|
+
assert_raise ArgumentError do
|
15
|
+
Statsample::Codification.create_yaml(@ds,[])
|
16
|
+
end
|
17
|
+
expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
|
18
|
+
yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
|
19
|
+
h=YAML::load(yaml_hash)
|
20
|
+
assert_equal(['v1'],h.keys)
|
21
|
+
assert_equal(expected_keys_v1,h['v1'].keys.sort)
|
22
|
+
tf = Tempfile.new("test_codification")
|
23
|
+
yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],Statsample::SPLIT_TOKEN,tf)
|
24
|
+
tf.close
|
25
|
+
tf.open
|
26
|
+
h=YAML::load(tf)
|
27
|
+
assert_equal(['v1'],h.keys)
|
28
|
+
assert_equal(expected_keys_v1,h['v1'].keys.sort)
|
29
|
+
tf.close(true)
|
30
|
+
end
|
31
|
+
def test_recodification
|
32
|
+
expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
|
33
|
+
assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
|
34
|
+
v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
|
35
|
+
expected=[['r'],['w','d'],nil,['w','d']]
|
36
|
+
assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
|
37
|
+
end
|
38
|
+
def test_recode_dataset_simple
|
39
|
+
yaml=YAML::dump({'v1'=>@dict})
|
40
|
+
Statsample::Codification.recode_dataset_simple!(@ds,yaml)
|
41
|
+
expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
|
42
|
+
assert_not_equal(expected_vector,@ds['v1'])
|
43
|
+
assert_equal(expected_vector,@ds['v1_recoded'])
|
44
|
+
end
|
45
|
+
def test_recode_dataset_split
|
46
|
+
yaml=YAML::dump({'v1'=>@dict})
|
47
|
+
Statsample::Codification.recode_dataset_split!(@ds,yaml)
|
48
|
+
e={}
|
49
|
+
e['r']=[1,1,0,1,0,0,0].to_vector
|
50
|
+
e['w']=[0,1,1,0,0,0,0].to_vector
|
51
|
+
e['s']=[0,0,0,0,1,1,1].to_vector
|
52
|
+
e['d']=[0,0,0,0,0,1,1].to_vector
|
53
|
+
e.each{|k,expected|
|
54
|
+
assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
|
55
|
+
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'statsample/crosstab'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StatsampleCrosstabTestCase < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
@v1=%w{black blonde black black red black brown black blonde black red black blonde}.to_vector
|
9
|
+
@v2=%w{woman man man woman man man man woman man woman woman man man}.to_vector
|
10
|
+
@ct=Statsample::Crosstab.new(@v1,@v2)
|
11
|
+
super
|
12
|
+
end
|
13
|
+
def test_crosstab_errors
|
14
|
+
e1=%w{black blonde black black red black brown black blonde black}
|
15
|
+
assert_raise ArgumentError do
|
16
|
+
Statsample::Crosstab.new(e1,@v2)
|
17
|
+
end
|
18
|
+
e2=%w{black blonde black black red black brown black blonde black black}.to_vector
|
19
|
+
|
20
|
+
assert_raise ArgumentError do
|
21
|
+
Statsample::Crosstab.new(e2,@v2)
|
22
|
+
end
|
23
|
+
assert_nothing_raised do
|
24
|
+
Statsample::Crosstab.new(@v1,@v2)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
def test_crosstab_basic
|
28
|
+
assert_equal(%w{black blonde brown red}, @ct.rows_names)
|
29
|
+
assert_equal(%w{man woman}, @ct.cols_names)
|
30
|
+
assert_equal({'black'=>7,'blonde'=>3,'red'=>2,'brown'=>1}, @ct.rows_total)
|
31
|
+
assert_equal({'man'=>8,'woman'=>5}, @ct.cols_total)
|
32
|
+
end
|
33
|
+
def test_crosstab_frequencies
|
34
|
+
fq=@ct.frequencies
|
35
|
+
assert_equal(8,fq.size)
|
36
|
+
sum=fq.inject(0) {|s,x| s+x[1]}
|
37
|
+
assert_equal(13,sum)
|
38
|
+
fr=@ct.frequencies_by_row
|
39
|
+
assert_equal(4,fr.size)
|
40
|
+
assert_equal(%w{black blonde brown red},fr.keys.sort)
|
41
|
+
fc=@ct.frequencies_by_col
|
42
|
+
assert_equal(2,fc.size)
|
43
|
+
assert_equal(%w{man woman},fc.keys.sort)
|
44
|
+
assert_equal(Matrix.rows([[3,4],[3,0],[1,0],[1,1]]),@ct.to_matrix)
|
45
|
+
end
|
46
|
+
def test_expected
|
47
|
+
v1=%w{1 1 1 1 1 0 0 0 0 0}.to_vector
|
48
|
+
v2=%w{0 0 0 0 0 1 1 1 1 1}.to_vector
|
49
|
+
ct=Statsample::Crosstab.new(v1,v2)
|
50
|
+
assert_equal(Matrix[[2.5,2.5],[2.5,2.5]],ct.matrix_expected)
|
51
|
+
end
|
52
|
+
def test_to_s
|
53
|
+
assert_match(/man\s+|\s+woman/,@ct.to_s)
|
54
|
+
end
|
55
|
+
end
|
data/test/test_csv.csv
ADDED
data/test/test_csv.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'tmpdir'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StatsampleCSVTestCase < Test::Unit::TestCase
|
6
|
+
def initialize(*args)
|
7
|
+
@ds=Statsample::CSV.read(File.dirname(__FILE__)+"/test_csv.csv")
|
8
|
+
super
|
9
|
+
end
|
10
|
+
def test_read
|
11
|
+
assert_equal(6,@ds.cases)
|
12
|
+
assert_equal(%w{id name age city a1},@ds.fields)
|
13
|
+
end
|
14
|
+
def test_nil
|
15
|
+
assert_equal(nil,@ds['age'][5])
|
16
|
+
end
|
17
|
+
def test_write
|
18
|
+
filename=Dir::tmpdir+"/test_write.csv"
|
19
|
+
Statsample::CSV.write(@ds,filename)
|
20
|
+
ds2=Statsample::CSV.read(filename)
|
21
|
+
i=0
|
22
|
+
ds2.each_array{|row|
|
23
|
+
assert_equal(@ds.case_as_array(i),row)
|
24
|
+
i+=1
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,293 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleDatasetTestCase < Test::Unit::TestCase
|
5
|
+
def initialize(*args)
|
6
|
+
@ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
|
7
|
+
'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
|
8
|
+
'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
|
9
|
+
super
|
10
|
+
end
|
11
|
+
def test_basic
|
12
|
+
assert_equal(5,@ds.cases)
|
13
|
+
assert_equal(%w{id name age city a1}, @ds.fields)
|
14
|
+
end
|
15
|
+
def test_matrix
|
16
|
+
matrix=Matrix[[1,2],[3,4],[5,6]]
|
17
|
+
ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
|
18
|
+
assert_equal(matrix,ds.to_matrix)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_fields
|
22
|
+
@ds.fields=%w{name a1 id age city}
|
23
|
+
assert_equal(%w{name a1 id age city}, @ds.fields)
|
24
|
+
@ds.fields=%w{id name age}
|
25
|
+
assert_equal(%w{id name age a1 city}, @ds.fields)
|
26
|
+
end
|
27
|
+
def test_each_vector
|
28
|
+
a=[1,2,3].to_vector
|
29
|
+
b=[3,4,5].to_vector
|
30
|
+
fields=["a","b"]
|
31
|
+
ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
|
32
|
+
res=[]
|
33
|
+
ds.each_vector{|k,v|
|
34
|
+
res.push([k,v])
|
35
|
+
}
|
36
|
+
assert_equal([["a",a],["b",b]],res)
|
37
|
+
ds.fields=["b","a"]
|
38
|
+
res=[]
|
39
|
+
ds.each_vector{|k,v|
|
40
|
+
res.push([k,v])
|
41
|
+
}
|
42
|
+
assert_equal([["b",b],["a",a]],res)
|
43
|
+
end
|
44
|
+
def test_equality
|
45
|
+
v1=[1,2,3,4].to_vector
|
46
|
+
v2=[5,6,7,8].to_vector
|
47
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
|
48
|
+
v3=[1,2,3,4].to_vector
|
49
|
+
v4=[5,6,7,8].to_vector
|
50
|
+
ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
|
51
|
+
assert_equal(ds1,ds2)
|
52
|
+
ds2.fields=%w{v1 v2}
|
53
|
+
assert_not_equal(ds1,ds2)
|
54
|
+
end
|
55
|
+
def test_add_vector
|
56
|
+
v=Statsample::Vector.new(%w{a b c d e})
|
57
|
+
@ds.add_vector('new',v)
|
58
|
+
assert_equal(%w{id name age city a1 new},@ds.fields)
|
59
|
+
x=Statsample::Vector.new(%w{a b c d e f g})
|
60
|
+
assert_raise ArgumentError do
|
61
|
+
@ds.add_vector('new2',x)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
def test_vector_by_calculation
|
65
|
+
a1=[1,2,3,4,5,6,7].to_vector(:scale)
|
66
|
+
a2=[10,20,30,40,50,60,70].to_vector(:scale)
|
67
|
+
a3=[100,200,300,400,500,600,700].to_vector(:scale)
|
68
|
+
ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
|
69
|
+
total=ds.vector_by_calculation() {|row|
|
70
|
+
row['a1']+row['a2']+row['a3']
|
71
|
+
}
|
72
|
+
expected=[111,222,333,444,555,666,777].to_vector(:scale)
|
73
|
+
assert_equal(expected,total)
|
74
|
+
end
|
75
|
+
def test_vector_sum
|
76
|
+
a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
|
77
|
+
a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
|
78
|
+
b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
|
79
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
80
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
|
81
|
+
total=ds.vector_sum
|
82
|
+
a=ds.vector_sum(['a1','a2'])
|
83
|
+
b=ds.vector_sum(['b1','b2'])
|
84
|
+
expected_a=[11,12,23,24,25,nil].to_vector(:scale)
|
85
|
+
expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
|
86
|
+
expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
|
87
|
+
assert_equal(expected_a, a)
|
88
|
+
assert_equal(expected_b, b)
|
89
|
+
assert_equal(expected_total, total)
|
90
|
+
end
|
91
|
+
def test_vector_missing_values
|
92
|
+
a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
|
93
|
+
a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
|
94
|
+
b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
|
95
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
96
|
+
c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
|
97
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
98
|
+
mva=[2,3,0,1,0,1].to_vector(:scale)
|
99
|
+
assert_equal(mva,ds.vector_missing_values)
|
100
|
+
end
|
101
|
+
def test_vector_count_characters
|
102
|
+
a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
|
103
|
+
a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
|
104
|
+
b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
|
105
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
106
|
+
c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
|
107
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
108
|
+
exp=[4,17,27,5,6,5].to_vector(:scale)
|
109
|
+
assert_equal(exp,ds.vector_count_characters)
|
110
|
+
|
111
|
+
end
|
112
|
+
def test_vector_mean
|
113
|
+
a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
|
114
|
+
a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
|
115
|
+
b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
|
116
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
117
|
+
c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
|
118
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
119
|
+
total=ds.vector_mean
|
120
|
+
a=ds.vector_mean(['a1','a2'],1)
|
121
|
+
b=ds.vector_mean(['b1','b2'],1)
|
122
|
+
c=ds.vector_mean(['b1','b2','c'],1)
|
123
|
+
expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
|
124
|
+
expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
|
125
|
+
expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
|
126
|
+
expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
|
127
|
+
assert_equal(expected_a, a)
|
128
|
+
assert_equal(expected_b, b)
|
129
|
+
assert_equal(expected_c, c)
|
130
|
+
assert_equal(expected_total, total)
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_each_array
|
134
|
+
expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
|
135
|
+
out=[]
|
136
|
+
@ds.each_array{ |a|
|
137
|
+
out.push(a)
|
138
|
+
}
|
139
|
+
assert_equal(expected,out)
|
140
|
+
end
|
141
|
+
def test_recode
|
142
|
+
@ds['age'].type=:scale
|
143
|
+
@ds.recode!("age") {|c| c['id']*2}
|
144
|
+
expected=[2,4,6,8,10].to_vector(:scale)
|
145
|
+
assert_equal(expected,@ds['age'])
|
146
|
+
end
|
147
|
+
def test_case_as
|
148
|
+
assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds.case_as_hash(0))
|
149
|
+
assert_equal([5,'George',5,'Tome','a,b,c'],@ds.case_as_array(4))
|
150
|
+
|
151
|
+
end
|
152
|
+
def test_delete_vector
|
153
|
+
@ds.delete_vector('name')
|
154
|
+
assert_equal(%w{id age city a1},@ds.fields)
|
155
|
+
assert_equal(%w{a1 age city id},@ds.vectors.keys.sort)
|
156
|
+
end
|
157
|
+
def test_change_type
|
158
|
+
@ds.col('age').type=:scale
|
159
|
+
assert_equal(:scale,@ds.col('age').type)
|
160
|
+
end
|
161
|
+
def test_split_by_separator_recode
|
162
|
+
@ds.add_vectors_by_split_recode("a1","_")
|
163
|
+
assert_equal(%w{id name age city a1 a1_1 a1_2 a1_3},@ds.fields)
|
164
|
+
assert_equal([1,0,1,nil,1],@ds.col('a1_1').to_a)
|
165
|
+
assert_equal([1,1,0,nil,1],@ds.col('a1_2').to_a)
|
166
|
+
assert_equal([0,1,0,nil,1],@ds.col('a1_3').to_a)
|
167
|
+
assert_equal({'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'},@ds.labels)
|
168
|
+
end
|
169
|
+
def test_split_by_separator
|
170
|
+
@ds.add_vectors_by_split("a1","_")
|
171
|
+
assert_equal(%w{id name age city a1 a1_a a1_b a1_c},@ds.fields)
|
172
|
+
assert_equal([1,0,1,nil,1],@ds.col('a1_a').to_a)
|
173
|
+
assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
|
174
|
+
assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_add_case
|
178
|
+
ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
|
179
|
+
ds.add_case([1,2,3])
|
180
|
+
ds.add_case({'a'=>4,'b'=>5,'c'=>6})
|
181
|
+
ds.add_case([[7,8,9],%w{a b c}])
|
182
|
+
assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
|
183
|
+
assert_equal([4,5,6],ds.case_as_array(1))
|
184
|
+
assert_equal([7,8,9],ds.case_as_array(2))
|
185
|
+
assert_equal(['a','b','c'],ds.case_as_array(3))
|
186
|
+
ds.add_case_array([6,7,1])
|
187
|
+
ds.update_valid_data
|
188
|
+
assert_equal([6,7,1],ds.case_as_array(4))
|
189
|
+
|
190
|
+
end
|
191
|
+
def test_marshaling
|
192
|
+
ds_marshal=Marshal.load(Marshal.dump(@ds))
|
193
|
+
assert_equal(ds_marshal,@ds)
|
194
|
+
end
|
195
|
+
def test_range
|
196
|
+
v1=[1,2,3,4].to_vector
|
197
|
+
v2=[5,6,7,8].to_vector
|
198
|
+
v3=[9,10,11,12].to_vector
|
199
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
|
200
|
+
assert_same(v1,ds1['v1'])
|
201
|
+
ds2=ds1["v2".."v1"]
|
202
|
+
assert_equal(%w{v2 v1},ds2.fields)
|
203
|
+
assert_same(ds1['v1'],ds2['v1'])
|
204
|
+
assert_same(ds1['v2'],ds2['v2'])
|
205
|
+
|
206
|
+
|
207
|
+
end
|
208
|
+
def test_dup
|
209
|
+
v1=[1,2,3,4].to_vector
|
210
|
+
v2=[5,6,7,8].to_vector
|
211
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
|
212
|
+
ds2=ds1.dup
|
213
|
+
assert_equal(ds1,ds2)
|
214
|
+
assert_not_same(ds1,ds2)
|
215
|
+
assert_equal(ds1['v1'],ds2['v1'])
|
216
|
+
assert_not_same(ds1['v1'],ds2['v1'])
|
217
|
+
assert_equal(ds1.fields,ds2.fields)
|
218
|
+
assert_not_same(ds1.fields,ds2.fields)
|
219
|
+
ds1['v1'].type=:scale
|
220
|
+
# dup partial
|
221
|
+
ds3=ds1.dup('v1')
|
222
|
+
ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
|
223
|
+
assert_equal(ds_exp,ds3)
|
224
|
+
assert_not_same(ds_exp,ds3)
|
225
|
+
assert_equal(ds3['v1'],ds_exp['v1'])
|
226
|
+
assert_not_same(ds3['v1'],ds_exp['v1'])
|
227
|
+
assert_equal(ds3.fields,ds_exp.fields)
|
228
|
+
assert_not_same(ds3.fields,ds_exp.fields)
|
229
|
+
|
230
|
+
|
231
|
+
# empty
|
232
|
+
ds3=ds1.dup_empty
|
233
|
+
assert_not_equal(ds1,ds3)
|
234
|
+
assert_not_equal(ds1['v1'],ds3['v1'])
|
235
|
+
assert_equal([],ds3['v1'].data)
|
236
|
+
assert_equal([],ds3['v2'].data)
|
237
|
+
assert_equal(:scale,ds3['v1'].type)
|
238
|
+
assert_equal(ds1.fields,ds2.fields)
|
239
|
+
assert_not_same(ds1.fields,ds2.fields)
|
240
|
+
end
|
241
|
+
def test_from_to
|
242
|
+
assert_equal(%w{name age city}, @ds.from_to("name","city"))
|
243
|
+
assert_raise ArgumentError do
|
244
|
+
@ds.from_to("name","a2")
|
245
|
+
end
|
246
|
+
end
|
247
|
+
def test_dup_only_valid
|
248
|
+
v1=[1,nil,3,4].to_vector(:scale)
|
249
|
+
v2=[5,6,nil,8].to_vector(:scale)
|
250
|
+
v3=[9,10,11,12].to_vector(:scale)
|
251
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
|
252
|
+
ds2=ds1.dup_only_valid
|
253
|
+
expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
|
254
|
+
assert_equal(expected,ds2)
|
255
|
+
assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
|
256
|
+
end
|
257
|
+
def test_filter
|
258
|
+
@ds['age'].type=:scale
|
259
|
+
filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
|
260
|
+
expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
|
261
|
+
'city'=>Statsample::Vector.new(['London','Paris']),
|
262
|
+
'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
|
263
|
+
assert_equal(expected,filtered)
|
264
|
+
end
|
265
|
+
def test_filter_field
|
266
|
+
@ds['age'].type=:scale
|
267
|
+
filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
|
268
|
+
expected=[2,4].to_vector
|
269
|
+
assert_equal(expected,filtered)
|
270
|
+
|
271
|
+
end
|
272
|
+
def test_verify
|
273
|
+
name=%w{r1 r2 r3 r4}.to_vector(:nominal)
|
274
|
+
v1=[1,2,3,4].to_vector(:scale)
|
275
|
+
v2=[4,3,2,1].to_vector(:scale)
|
276
|
+
v3=[10,20,30,40].to_vector(:scale)
|
277
|
+
v4=%w{a b a b}.to_vector(:nominal)
|
278
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
|
279
|
+
ds.fields=%w{v1 v2 v3 v4 id}
|
280
|
+
#Correct
|
281
|
+
t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
|
282
|
+
t2=create_test("v3=v1*10") {|r| r['v3']==r['v1']*10}
|
283
|
+
# Fail!
|
284
|
+
t3=create_test("v4='b'") {|r| r['v4']=='b'}
|
285
|
+
exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
|
286
|
+
exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
|
287
|
+
res=ds.verify(t3,t1,t2)
|
288
|
+
assert_equal(exp1,res)
|
289
|
+
res=ds.verify('id',t1,t2,t3)
|
290
|
+
assert_equal(exp2,res)
|
291
|
+
|
292
|
+
end
|
293
|
+
end
|