statsample 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +79 -0
- data/Manifest.txt +56 -0
- data/README.txt +77 -0
- data/Rakefile +22 -0
- data/bin/statsample +2 -0
- data/demo/benchmark.rb +52 -0
- data/demo/chi-square.rb +44 -0
- data/demo/dice.rb +13 -0
- data/demo/distribution_t.rb +95 -0
- data/demo/graph.rb +9 -0
- data/demo/item_analysis.rb +30 -0
- data/demo/mean.rb +81 -0
- data/demo/proportion.rb +57 -0
- data/demo/sample_test.csv +113 -0
- data/demo/strata_proportion.rb +152 -0
- data/demo/stratum.rb +141 -0
- data/lib/spss.rb +131 -0
- data/lib/statsample.rb +216 -0
- data/lib/statsample/anova.rb +74 -0
- data/lib/statsample/bivariate.rb +255 -0
- data/lib/statsample/chidistribution.rb +39 -0
- data/lib/statsample/codification.rb +120 -0
- data/lib/statsample/converters.rb +338 -0
- data/lib/statsample/crosstab.rb +122 -0
- data/lib/statsample/dataset.rb +526 -0
- data/lib/statsample/dominanceanalysis.rb +259 -0
- data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
- data/lib/statsample/graph/gdchart.rb +45 -0
- data/lib/statsample/graph/svgboxplot.rb +108 -0
- data/lib/statsample/graph/svggraph.rb +181 -0
- data/lib/statsample/graph/svghistogram.rb +208 -0
- data/lib/statsample/graph/svgscatterplot.rb +111 -0
- data/lib/statsample/htmlreport.rb +232 -0
- data/lib/statsample/multiset.rb +281 -0
- data/lib/statsample/regression.rb +522 -0
- data/lib/statsample/reliability.rb +235 -0
- data/lib/statsample/resample.rb +20 -0
- data/lib/statsample/srs.rb +159 -0
- data/lib/statsample/test.rb +25 -0
- data/lib/statsample/vector.rb +759 -0
- data/test/_test_chart.rb +58 -0
- data/test/test_anova.rb +31 -0
- data/test/test_codification.rb +59 -0
- data/test/test_crosstab.rb +55 -0
- data/test/test_csv.csv +7 -0
- data/test/test_csv.rb +27 -0
- data/test/test_dataset.rb +293 -0
- data/test/test_ggobi.rb +42 -0
- data/test/test_multiset.rb +98 -0
- data/test/test_regression.rb +108 -0
- data/test/test_reliability.rb +32 -0
- data/test/test_resample.rb +23 -0
- data/test/test_srs.rb +14 -0
- data/test/test_statistics.rb +152 -0
- data/test/test_stratified.rb +19 -0
- data/test/test_svg_graph.rb +63 -0
- data/test/test_vector.rb +265 -0
- data/test/test_xls.rb +32 -0
- metadata +158 -0
data/test/_test_chart.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'tempfile'
|
3
|
+
require 'test/unit'
|
4
|
+
require 'statsample/chart/gdchart'
|
5
|
+
# Not included on default test, because GDChart send a lot of warnings!
|
6
|
+
class StatsampleChartTestCase < Test::Unit::TestCase
|
7
|
+
|
8
|
+
def initialize(*args)
|
9
|
+
@image_path=File.dirname(__FILE__)+"/images"
|
10
|
+
super
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_base_chart
|
14
|
+
file=@image_path+"/gdchart_base_bar_1.jpg"
|
15
|
+
width=500
|
16
|
+
height=300
|
17
|
+
chart_type=GDChart::BAR
|
18
|
+
labels=["a","b","c","d","e"]
|
19
|
+
options={'set_color'=>[0xFF3399]}
|
20
|
+
n_data=1
|
21
|
+
data=[10,40,30,20,40]
|
22
|
+
|
23
|
+
Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
|
24
|
+
assert(File.exists?(file))
|
25
|
+
%w{STACK_DEPTH STACK_SUM STACK_BESIDE STACK_LAYER}.each{|stack|
|
26
|
+
file=@image_path+"/gdchart_base_bar_2_#{stack}.jpg"
|
27
|
+
n_data=2
|
28
|
+
options={'set_color'=>[0xFF3399,0x33FF99,0xFF99FF,0xFF3399], 'stack_type'=>GDChart.const_get(stack.intern),'title'=>"Bar #{stack}"}
|
29
|
+
|
30
|
+
chart_type=GDChart::BAR
|
31
|
+
|
32
|
+
data=[10,15,10,20,30,30,20,5,15,20]
|
33
|
+
Statsample::Util.chart_gdchart(file,width,height,chart_type, labels, options,n_data,data)
|
34
|
+
assert(File.exists?(file))
|
35
|
+
}
|
36
|
+
end
|
37
|
+
def test_vector
|
38
|
+
file=@image_path+"/gdchart_bar.jpg"
|
39
|
+
ar=[]
|
40
|
+
(1..1000).each {|a|
|
41
|
+
ar.push(rand(10))
|
42
|
+
}
|
43
|
+
vector=ar.to_vector
|
44
|
+
file=@image_path+"/gdchart_bar.jpg"
|
45
|
+
vector.gdchart_frequencies(file,800,600,GDChart::BAR,'title'=>'Bar')
|
46
|
+
assert(File.exists?(file))
|
47
|
+
file=@image_path+"/gdchart_bar3d.jpg"
|
48
|
+
vector.gdchart_frequencies(file,300,100,GDChart::BAR3D,'title'=>'Bar3D')
|
49
|
+
assert(File.exists?(file))
|
50
|
+
file=@image_path+"/gdchart_floatingbar.jpg"
|
51
|
+
vector.gdchart_frequencies(file,200,200,GDChart::LINE,'title'=>'FloatingBar')
|
52
|
+
assert(File.exists?(file))
|
53
|
+
vector.type=:scale
|
54
|
+
file=@image_path+"/gdchart_histogram.jpg"
|
55
|
+
vector.gdchart_histogram(5,file,300,400,GDChart::BAR,'title'=>'Histogram')
|
56
|
+
assert(File.exists?(file))
|
57
|
+
end
|
58
|
+
end
|
data/test/test_anova.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample.rb'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleAnovaTestCase < Test::Unit::TestCase
|
5
|
+
def initialize(*args)
|
6
|
+
@v1=[3,3,2,3,6].to_vector(:scale)
|
7
|
+
@v2=[7,6,5,6,7].to_vector(:scale)
|
8
|
+
@v3=[9,8,9,7,8].to_vector(:scale)
|
9
|
+
@anova=Statsample::Anova::OneWay.new([@v1,@v2,@v3])
|
10
|
+
super
|
11
|
+
end
|
12
|
+
def test_basic
|
13
|
+
assert_in_delta(72.933, @anova.sst,0.001)
|
14
|
+
assert_in_delta(14.8,@anova.sswg,0.001)
|
15
|
+
assert_in_delta(58.133,@anova.ssbg,0.001)
|
16
|
+
assert_in_delta(@anova.sst,@anova.sswg+@anova.ssbg,0.00001)
|
17
|
+
assert_equal(14,@anova.df_total)
|
18
|
+
assert_equal(12,@anova.df_wg)
|
19
|
+
assert_equal(2,@anova.df_bg)
|
20
|
+
assert_in_delta(23.568,@anova.f,0.001)
|
21
|
+
anova2=Statsample::Anova::OneWay.new([@v1,@v1,@v1,@v1,@v2])
|
22
|
+
assert_in_delta(3.960, anova2.f,0.001)
|
23
|
+
|
24
|
+
if HAS_GSL
|
25
|
+
assert(@anova.significance<0.01)
|
26
|
+
assert_in_delta(0.016, anova2.significance,0.001)
|
27
|
+
else
|
28
|
+
puts "Skipped OneWay#significance (no GSL)"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'tempfile'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StatsampleCodificationTestCase < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
|
9
|
+
@dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s','dream'=>'d','dreaming'=>'d'}
|
10
|
+
@ds={"v1"=>v1}.to_dataset
|
11
|
+
super
|
12
|
+
end
|
13
|
+
def test_create_yaml
|
14
|
+
assert_raise ArgumentError do
|
15
|
+
Statsample::Codification.create_yaml(@ds,[])
|
16
|
+
end
|
17
|
+
expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
|
18
|
+
yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
|
19
|
+
h=YAML::load(yaml_hash)
|
20
|
+
assert_equal(['v1'],h.keys)
|
21
|
+
assert_equal(expected_keys_v1,h['v1'].keys.sort)
|
22
|
+
tf = Tempfile.new("test_codification")
|
23
|
+
yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],Statsample::SPLIT_TOKEN,tf)
|
24
|
+
tf.close
|
25
|
+
tf.open
|
26
|
+
h=YAML::load(tf)
|
27
|
+
assert_equal(['v1'],h.keys)
|
28
|
+
assert_equal(expected_keys_v1,h['v1'].keys.sort)
|
29
|
+
tf.close(true)
|
30
|
+
end
|
31
|
+
def test_recodification
|
32
|
+
expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
|
33
|
+
assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
|
34
|
+
v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
|
35
|
+
expected=[['r'],['w','d'],nil,['w','d']]
|
36
|
+
assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
|
37
|
+
end
|
38
|
+
def test_recode_dataset_simple
|
39
|
+
yaml=YAML::dump({'v1'=>@dict})
|
40
|
+
Statsample::Codification.recode_dataset_simple!(@ds,yaml)
|
41
|
+
expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
|
42
|
+
assert_not_equal(expected_vector,@ds['v1'])
|
43
|
+
assert_equal(expected_vector,@ds['v1_recoded'])
|
44
|
+
end
|
45
|
+
def test_recode_dataset_split
|
46
|
+
yaml=YAML::dump({'v1'=>@dict})
|
47
|
+
Statsample::Codification.recode_dataset_split!(@ds,yaml)
|
48
|
+
e={}
|
49
|
+
e['r']=[1,1,0,1,0,0,0].to_vector
|
50
|
+
e['w']=[0,1,1,0,0,0,0].to_vector
|
51
|
+
e['s']=[0,0,0,0,1,1,1].to_vector
|
52
|
+
e['d']=[0,0,0,0,0,1,1].to_vector
|
53
|
+
e.each{|k,expected|
|
54
|
+
assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
|
55
|
+
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'statsample/crosstab'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StatsampleCrosstabTestCase < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
@v1=%w{black blonde black black red black brown black blonde black red black blonde}.to_vector
|
9
|
+
@v2=%w{woman man man woman man man man woman man woman woman man man}.to_vector
|
10
|
+
@ct=Statsample::Crosstab.new(@v1,@v2)
|
11
|
+
super
|
12
|
+
end
|
13
|
+
def test_crosstab_errors
|
14
|
+
e1=%w{black blonde black black red black brown black blonde black}
|
15
|
+
assert_raise ArgumentError do
|
16
|
+
Statsample::Crosstab.new(e1,@v2)
|
17
|
+
end
|
18
|
+
e2=%w{black blonde black black red black brown black blonde black black}.to_vector
|
19
|
+
|
20
|
+
assert_raise ArgumentError do
|
21
|
+
Statsample::Crosstab.new(e2,@v2)
|
22
|
+
end
|
23
|
+
assert_nothing_raised do
|
24
|
+
Statsample::Crosstab.new(@v1,@v2)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
def test_crosstab_basic
|
28
|
+
assert_equal(%w{black blonde brown red}, @ct.rows_names)
|
29
|
+
assert_equal(%w{man woman}, @ct.cols_names)
|
30
|
+
assert_equal({'black'=>7,'blonde'=>3,'red'=>2,'brown'=>1}, @ct.rows_total)
|
31
|
+
assert_equal({'man'=>8,'woman'=>5}, @ct.cols_total)
|
32
|
+
end
|
33
|
+
def test_crosstab_frequencies
|
34
|
+
fq=@ct.frequencies
|
35
|
+
assert_equal(8,fq.size)
|
36
|
+
sum=fq.inject(0) {|s,x| s+x[1]}
|
37
|
+
assert_equal(13,sum)
|
38
|
+
fr=@ct.frequencies_by_row
|
39
|
+
assert_equal(4,fr.size)
|
40
|
+
assert_equal(%w{black blonde brown red},fr.keys.sort)
|
41
|
+
fc=@ct.frequencies_by_col
|
42
|
+
assert_equal(2,fc.size)
|
43
|
+
assert_equal(%w{man woman},fc.keys.sort)
|
44
|
+
assert_equal(Matrix.rows([[3,4],[3,0],[1,0],[1,1]]),@ct.to_matrix)
|
45
|
+
end
|
46
|
+
def test_expected
|
47
|
+
v1=%w{1 1 1 1 1 0 0 0 0 0}.to_vector
|
48
|
+
v2=%w{0 0 0 0 0 1 1 1 1 1}.to_vector
|
49
|
+
ct=Statsample::Crosstab.new(v1,v2)
|
50
|
+
assert_equal(Matrix[[2.5,2.5],[2.5,2.5]],ct.matrix_expected)
|
51
|
+
end
|
52
|
+
def test_to_s
|
53
|
+
assert_match(/man\s+|\s+woman/,@ct.to_s)
|
54
|
+
end
|
55
|
+
end
|
data/test/test_csv.csv
ADDED
data/test/test_csv.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'tmpdir'
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
class StatsampleCSVTestCase < Test::Unit::TestCase
|
6
|
+
def initialize(*args)
|
7
|
+
@ds=Statsample::CSV.read(File.dirname(__FILE__)+"/test_csv.csv")
|
8
|
+
super
|
9
|
+
end
|
10
|
+
def test_read
|
11
|
+
assert_equal(6,@ds.cases)
|
12
|
+
assert_equal(%w{id name age city a1},@ds.fields)
|
13
|
+
end
|
14
|
+
def test_nil
|
15
|
+
assert_equal(nil,@ds['age'][5])
|
16
|
+
end
|
17
|
+
def test_write
|
18
|
+
filename=Dir::tmpdir+"/test_write.csv"
|
19
|
+
Statsample::CSV.write(@ds,filename)
|
20
|
+
ds2=Statsample::CSV.read(filename)
|
21
|
+
i=0
|
22
|
+
ds2.each_array{|row|
|
23
|
+
assert_equal(@ds.case_as_array(i),row)
|
24
|
+
i+=1
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,293 @@
|
|
1
|
+
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
+
require 'test/unit'
|
3
|
+
|
4
|
+
class StatsampleDatasetTestCase < Test::Unit::TestCase
|
5
|
+
def initialize(*args)
|
6
|
+
@ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
|
7
|
+
'city'=>Statsample::Vector.new(['New York','London','London','Paris','Tome']),
|
8
|
+
'a1'=>Statsample::Vector.new(['a,b','b,c','a',nil,'a,b,c'])}, ['id','name','age','city','a1'])
|
9
|
+
super
|
10
|
+
end
|
11
|
+
def test_basic
|
12
|
+
assert_equal(5,@ds.cases)
|
13
|
+
assert_equal(%w{id name age city a1}, @ds.fields)
|
14
|
+
end
|
15
|
+
def test_matrix
|
16
|
+
matrix=Matrix[[1,2],[3,4],[5,6]]
|
17
|
+
ds=Statsample::Dataset.new('v1'=>[1,3,5].to_vector,'v2'=>[2,4,6].to_vector)
|
18
|
+
assert_equal(matrix,ds.to_matrix)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_fields
|
22
|
+
@ds.fields=%w{name a1 id age city}
|
23
|
+
assert_equal(%w{name a1 id age city}, @ds.fields)
|
24
|
+
@ds.fields=%w{id name age}
|
25
|
+
assert_equal(%w{id name age a1 city}, @ds.fields)
|
26
|
+
end
|
27
|
+
def test_each_vector
|
28
|
+
a=[1,2,3].to_vector
|
29
|
+
b=[3,4,5].to_vector
|
30
|
+
fields=["a","b"]
|
31
|
+
ds=Statsample::Dataset.new({'a'=>a,'b'=>b},fields)
|
32
|
+
res=[]
|
33
|
+
ds.each_vector{|k,v|
|
34
|
+
res.push([k,v])
|
35
|
+
}
|
36
|
+
assert_equal([["a",a],["b",b]],res)
|
37
|
+
ds.fields=["b","a"]
|
38
|
+
res=[]
|
39
|
+
ds.each_vector{|k,v|
|
40
|
+
res.push([k,v])
|
41
|
+
}
|
42
|
+
assert_equal([["b",b],["a",a]],res)
|
43
|
+
end
|
44
|
+
def test_equality
|
45
|
+
v1=[1,2,3,4].to_vector
|
46
|
+
v2=[5,6,7,8].to_vector
|
47
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
|
48
|
+
v3=[1,2,3,4].to_vector
|
49
|
+
v4=[5,6,7,8].to_vector
|
50
|
+
ds2=Statsample::Dataset.new({'v1'=>v3,'v2'=>v4}, %w{v2 v1})
|
51
|
+
assert_equal(ds1,ds2)
|
52
|
+
ds2.fields=%w{v1 v2}
|
53
|
+
assert_not_equal(ds1,ds2)
|
54
|
+
end
|
55
|
+
def test_add_vector
|
56
|
+
v=Statsample::Vector.new(%w{a b c d e})
|
57
|
+
@ds.add_vector('new',v)
|
58
|
+
assert_equal(%w{id name age city a1 new},@ds.fields)
|
59
|
+
x=Statsample::Vector.new(%w{a b c d e f g})
|
60
|
+
assert_raise ArgumentError do
|
61
|
+
@ds.add_vector('new2',x)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
def test_vector_by_calculation
|
65
|
+
a1=[1,2,3,4,5,6,7].to_vector(:scale)
|
66
|
+
a2=[10,20,30,40,50,60,70].to_vector(:scale)
|
67
|
+
a3=[100,200,300,400,500,600,700].to_vector(:scale)
|
68
|
+
ds={'a1'=>a1,'a2'=>a2,'a3'=>a3}.to_dataset
|
69
|
+
total=ds.vector_by_calculation() {|row|
|
70
|
+
row['a1']+row['a2']+row['a3']
|
71
|
+
}
|
72
|
+
expected=[111,222,333,444,555,666,777].to_vector(:scale)
|
73
|
+
assert_equal(expected,total)
|
74
|
+
end
|
75
|
+
def test_vector_sum
|
76
|
+
a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
|
77
|
+
a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
|
78
|
+
b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
|
79
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
80
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2}.to_dataset
|
81
|
+
total=ds.vector_sum
|
82
|
+
a=ds.vector_sum(['a1','a2'])
|
83
|
+
b=ds.vector_sum(['b1','b2'])
|
84
|
+
expected_a=[11,12,23,24,25,nil].to_vector(:scale)
|
85
|
+
expected_b=[nil,3,3,nil,3,5].to_vector(:scale)
|
86
|
+
expected_total=[nil,15,26,nil,28,nil].to_vector(:scale)
|
87
|
+
assert_equal(expected_a, a)
|
88
|
+
assert_equal(expected_b, b)
|
89
|
+
assert_equal(expected_total, total)
|
90
|
+
end
|
91
|
+
def test_vector_missing_values
|
92
|
+
a1=[1 ,nil ,3 ,4 , 5,nil].to_vector(:scale)
|
93
|
+
a2=[10 ,nil ,20,20 ,20,30].to_vector(:scale)
|
94
|
+
b1=[nil,nil ,1 ,1 ,1 ,2].to_vector(:scale)
|
95
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
96
|
+
c= [nil,2 , 4,2 ,2 ,2].to_vector(:scale)
|
97
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
98
|
+
mva=[2,3,0,1,0,1].to_vector(:scale)
|
99
|
+
assert_equal(mva,ds.vector_missing_values)
|
100
|
+
end
|
101
|
+
def test_vector_count_characters
|
102
|
+
a1=[1 ,"abcde" ,3 ,4 , 5,nil].to_vector(:scale)
|
103
|
+
a2=[10 ,20.3 ,20 ,20 ,20,30].to_vector(:scale)
|
104
|
+
b1=[nil,"343434" ,1 ,1 ,1 ,2].to_vector(:scale)
|
105
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
106
|
+
c= [nil,2 ,"This is a nice example",2 ,2 ,2].to_vector(:scale)
|
107
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
108
|
+
exp=[4,17,27,5,6,5].to_vector(:scale)
|
109
|
+
assert_equal(exp,ds.vector_count_characters)
|
110
|
+
|
111
|
+
end
|
112
|
+
def test_vector_mean
|
113
|
+
a1=[1 ,2 ,3 ,4 , 5,nil].to_vector(:scale)
|
114
|
+
a2=[10 ,10,20,20 ,20,30].to_vector(:scale)
|
115
|
+
b1=[nil,1 ,1 ,1 ,1 ,2].to_vector(:scale)
|
116
|
+
b2=[2 ,2 ,2 ,nil,2 ,3].to_vector(:scale)
|
117
|
+
c= [nil,2, 4,2 ,2 ,2].to_vector(:scale)
|
118
|
+
ds={'a1'=>a1,'a2'=>a2,'b1'=>b1,'b2'=>b2,'c'=>c}.to_dataset
|
119
|
+
total=ds.vector_mean
|
120
|
+
a=ds.vector_mean(['a1','a2'],1)
|
121
|
+
b=ds.vector_mean(['b1','b2'],1)
|
122
|
+
c=ds.vector_mean(['b1','b2','c'],1)
|
123
|
+
expected_a=[5.5,6,11.5,12,12.5,30].to_vector(:scale)
|
124
|
+
expected_b=[2,1.5,1.5,1,1.5,2.5].to_vector(:scale)
|
125
|
+
expected_c=[nil, 5.0/3,7.0/3,1.5,5.0/3,7.0/3].to_vector(:scale)
|
126
|
+
expected_total=[nil,3.4,6,nil,6.0,nil].to_vector(:scale)
|
127
|
+
assert_equal(expected_a, a)
|
128
|
+
assert_equal(expected_b, b)
|
129
|
+
assert_equal(expected_c, c)
|
130
|
+
assert_equal(expected_total, total)
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_each_array
|
134
|
+
expected=[[1,'Alex',20,'New York','a,b'], [2,'Claude',23,'London','b,c'], [3,'Peter',25,'London','a'],[4,'Franz', 27,'Paris',nil],[5,'George',5,'Tome','a,b,c']]
|
135
|
+
out=[]
|
136
|
+
@ds.each_array{ |a|
|
137
|
+
out.push(a)
|
138
|
+
}
|
139
|
+
assert_equal(expected,out)
|
140
|
+
end
|
141
|
+
def test_recode
|
142
|
+
@ds['age'].type=:scale
|
143
|
+
@ds.recode!("age") {|c| c['id']*2}
|
144
|
+
expected=[2,4,6,8,10].to_vector(:scale)
|
145
|
+
assert_equal(expected,@ds['age'])
|
146
|
+
end
|
147
|
+
def test_case_as
|
148
|
+
assert_equal({'id'=>1,'name'=>'Alex','city'=>'New York','age'=>20,'a1'=>'a,b'},@ds.case_as_hash(0))
|
149
|
+
assert_equal([5,'George',5,'Tome','a,b,c'],@ds.case_as_array(4))
|
150
|
+
|
151
|
+
end
|
152
|
+
def test_delete_vector
|
153
|
+
@ds.delete_vector('name')
|
154
|
+
assert_equal(%w{id age city a1},@ds.fields)
|
155
|
+
assert_equal(%w{a1 age city id},@ds.vectors.keys.sort)
|
156
|
+
end
|
157
|
+
def test_change_type
|
158
|
+
@ds.col('age').type=:scale
|
159
|
+
assert_equal(:scale,@ds.col('age').type)
|
160
|
+
end
|
161
|
+
def test_split_by_separator_recode
|
162
|
+
@ds.add_vectors_by_split_recode("a1","_")
|
163
|
+
assert_equal(%w{id name age city a1 a1_1 a1_2 a1_3},@ds.fields)
|
164
|
+
assert_equal([1,0,1,nil,1],@ds.col('a1_1').to_a)
|
165
|
+
assert_equal([1,1,0,nil,1],@ds.col('a1_2').to_a)
|
166
|
+
assert_equal([0,1,0,nil,1],@ds.col('a1_3').to_a)
|
167
|
+
assert_equal({'a1_1'=>'a1:a', 'a1_2'=>'a1:b', 'a1_3'=>'a1:c'},@ds.labels)
|
168
|
+
end
|
169
|
+
def test_split_by_separator
|
170
|
+
@ds.add_vectors_by_split("a1","_")
|
171
|
+
assert_equal(%w{id name age city a1 a1_a a1_b a1_c},@ds.fields)
|
172
|
+
assert_equal([1,0,1,nil,1],@ds.col('a1_a').to_a)
|
173
|
+
assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
|
174
|
+
assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_add_case
|
178
|
+
ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
|
179
|
+
ds.add_case([1,2,3])
|
180
|
+
ds.add_case({'a'=>4,'b'=>5,'c'=>6})
|
181
|
+
ds.add_case([[7,8,9],%w{a b c}])
|
182
|
+
assert_equal({'a'=>1,'b'=>2,'c'=>3},ds.case_as_hash(0))
|
183
|
+
assert_equal([4,5,6],ds.case_as_array(1))
|
184
|
+
assert_equal([7,8,9],ds.case_as_array(2))
|
185
|
+
assert_equal(['a','b','c'],ds.case_as_array(3))
|
186
|
+
ds.add_case_array([6,7,1])
|
187
|
+
ds.update_valid_data
|
188
|
+
assert_equal([6,7,1],ds.case_as_array(4))
|
189
|
+
|
190
|
+
end
|
191
|
+
def test_marshaling
|
192
|
+
ds_marshal=Marshal.load(Marshal.dump(@ds))
|
193
|
+
assert_equal(ds_marshal,@ds)
|
194
|
+
end
|
195
|
+
def test_range
|
196
|
+
v1=[1,2,3,4].to_vector
|
197
|
+
v2=[5,6,7,8].to_vector
|
198
|
+
v3=[9,10,11,12].to_vector
|
199
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3}, %w{v3 v2 v1})
|
200
|
+
assert_same(v1,ds1['v1'])
|
201
|
+
ds2=ds1["v2".."v1"]
|
202
|
+
assert_equal(%w{v2 v1},ds2.fields)
|
203
|
+
assert_same(ds1['v1'],ds2['v1'])
|
204
|
+
assert_same(ds1['v2'],ds2['v2'])
|
205
|
+
|
206
|
+
|
207
|
+
end
|
208
|
+
def test_dup
|
209
|
+
v1=[1,2,3,4].to_vector
|
210
|
+
v2=[5,6,7,8].to_vector
|
211
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2}, %w{v2 v1})
|
212
|
+
ds2=ds1.dup
|
213
|
+
assert_equal(ds1,ds2)
|
214
|
+
assert_not_same(ds1,ds2)
|
215
|
+
assert_equal(ds1['v1'],ds2['v1'])
|
216
|
+
assert_not_same(ds1['v1'],ds2['v1'])
|
217
|
+
assert_equal(ds1.fields,ds2.fields)
|
218
|
+
assert_not_same(ds1.fields,ds2.fields)
|
219
|
+
ds1['v1'].type=:scale
|
220
|
+
# dup partial
|
221
|
+
ds3=ds1.dup('v1')
|
222
|
+
ds_exp=Statsample::Dataset.new({'v1'=>v1},%w{v1})
|
223
|
+
assert_equal(ds_exp,ds3)
|
224
|
+
assert_not_same(ds_exp,ds3)
|
225
|
+
assert_equal(ds3['v1'],ds_exp['v1'])
|
226
|
+
assert_not_same(ds3['v1'],ds_exp['v1'])
|
227
|
+
assert_equal(ds3.fields,ds_exp.fields)
|
228
|
+
assert_not_same(ds3.fields,ds_exp.fields)
|
229
|
+
|
230
|
+
|
231
|
+
# empty
|
232
|
+
ds3=ds1.dup_empty
|
233
|
+
assert_not_equal(ds1,ds3)
|
234
|
+
assert_not_equal(ds1['v1'],ds3['v1'])
|
235
|
+
assert_equal([],ds3['v1'].data)
|
236
|
+
assert_equal([],ds3['v2'].data)
|
237
|
+
assert_equal(:scale,ds3['v1'].type)
|
238
|
+
assert_equal(ds1.fields,ds2.fields)
|
239
|
+
assert_not_same(ds1.fields,ds2.fields)
|
240
|
+
end
|
241
|
+
def test_from_to
|
242
|
+
assert_equal(%w{name age city}, @ds.from_to("name","city"))
|
243
|
+
assert_raise ArgumentError do
|
244
|
+
@ds.from_to("name","a2")
|
245
|
+
end
|
246
|
+
end
|
247
|
+
def test_dup_only_valid
|
248
|
+
v1=[1,nil,3,4].to_vector(:scale)
|
249
|
+
v2=[5,6,nil,8].to_vector(:scale)
|
250
|
+
v3=[9,10,11,12].to_vector(:scale)
|
251
|
+
ds1=Statsample::Dataset.new({'v1'=>v1,'v2'=>v2,'v3'=>v3})
|
252
|
+
ds2=ds1.dup_only_valid
|
253
|
+
expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
|
254
|
+
assert_equal(expected,ds2)
|
255
|
+
assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
|
256
|
+
end
|
257
|
+
def test_filter
|
258
|
+
@ds['age'].type=:scale
|
259
|
+
filtered=@ds.filter{|c| c['id']==2 or c['id']==4}
|
260
|
+
expected=Statsample::Dataset.new({'id' => Statsample::Vector.new([2,4]), 'name'=>Statsample::Vector.new(%w{Claude Franz}), 'age'=>Statsample::Vector.new([23,27],:scale),
|
261
|
+
'city'=>Statsample::Vector.new(['London','Paris']),
|
262
|
+
'a1'=>Statsample::Vector.new(['b,c',nil,])}, ['id','name','age','city','a1'])
|
263
|
+
assert_equal(expected,filtered)
|
264
|
+
end
|
265
|
+
def test_filter_field
|
266
|
+
@ds['age'].type=:scale
|
267
|
+
filtered=@ds.filter_field('id') {|c| c['id']==2 or c['id']==4}
|
268
|
+
expected=[2,4].to_vector
|
269
|
+
assert_equal(expected,filtered)
|
270
|
+
|
271
|
+
end
|
272
|
+
def test_verify
|
273
|
+
name=%w{r1 r2 r3 r4}.to_vector(:nominal)
|
274
|
+
v1=[1,2,3,4].to_vector(:scale)
|
275
|
+
v2=[4,3,2,1].to_vector(:scale)
|
276
|
+
v3=[10,20,30,40].to_vector(:scale)
|
277
|
+
v4=%w{a b a b}.to_vector(:nominal)
|
278
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'id'=>name}.to_dataset
|
279
|
+
ds.fields=%w{v1 v2 v3 v4 id}
|
280
|
+
#Correct
|
281
|
+
t1=create_test("If v4=a, v1 odd") {|r| r['v4']=='b' or (r['v4']=='a' and r['v1']%2==1)}
|
282
|
+
t2=create_test("v3=v1*10") {|r| r['v3']==r['v1']*10}
|
283
|
+
# Fail!
|
284
|
+
t3=create_test("v4='b'") {|r| r['v4']=='b'}
|
285
|
+
exp1=["1 [1]: v4='b'", "3 [3]: v4='b'"]
|
286
|
+
exp2=["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
|
287
|
+
res=ds.verify(t3,t1,t2)
|
288
|
+
assert_equal(exp1,res)
|
289
|
+
res=ds.verify('id',t1,t2,t3)
|
290
|
+
assert_equal(exp2,res)
|
291
|
+
|
292
|
+
end
|
293
|
+
end
|