statsample 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +11 -0
- data/Manifest.txt +7 -0
- data/README.txt +3 -3
- data/data/repeated_fields.csv +7 -0
- data/data/tetmat_matrix.txt +5 -0
- data/data/tetmat_test.txt +1001 -0
- data/demo/spss_matrix.rb +3 -0
- data/lib/spss.rb +1 -1
- data/lib/statistics2.rb +1 -1
- data/lib/statsample.rb +30 -1
- data/lib/statsample/anova.rb +62 -66
- data/lib/statsample/bivariate.rb +273 -281
- data/lib/statsample/bivariate/tetrachoric.rb +418 -0
- data/lib/statsample/codification.rb +15 -15
- data/lib/statsample/combination.rb +108 -106
- data/lib/statsample/converter/csv18.rb +52 -52
- data/lib/statsample/converter/csv19.rb +45 -48
- data/lib/statsample/converter/spss.rb +47 -0
- data/lib/statsample/converters.rb +74 -77
- data/lib/statsample/crosstab.rb +21 -17
- data/lib/statsample/dataset.rb +595 -543
- data/lib/statsample/dominanceanalysis.rb +7 -10
- data/lib/statsample/htmlreport.rb +23 -0
- data/lib/statsample/regression/multiple/baseengine.rb +59 -59
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/reliability.rb +165 -145
- data/lib/statsample/vector.rb +16 -2
- data/test/test_anova.rb +16 -16
- data/test/test_bivariate.rb +146 -0
- data/test/test_csv.rb +6 -0
- data/test/test_dataset.rb +49 -5
- data/test/test_statistics.rb +6 -90
- data/test/test_vector.rb +27 -10
- metadata +10 -4
- data/test/test_r.rb +0 -9
- data/test/test_stata.rb +0 -11
data/lib/statsample/vector.rb
CHANGED
@@ -182,6 +182,22 @@ module Statsample
|
|
182
182
|
}
|
183
183
|
set_valid_data
|
184
184
|
end
|
185
|
+
# Dicotomize the vector with 0 and 1, based on lowest value
|
186
|
+
# If parameter if defined, this value and lower
|
187
|
+
# will be 0 and higher, 1
|
188
|
+
def dichotomize(low=nil)
|
189
|
+
fs=factors
|
190
|
+
low||=factors.min
|
191
|
+
@data_with_nils.collect{|x|
|
192
|
+
if x.nil?
|
193
|
+
nil
|
194
|
+
elsif x>low
|
195
|
+
1
|
196
|
+
else
|
197
|
+
0
|
198
|
+
end
|
199
|
+
}.to_scale
|
200
|
+
end
|
185
201
|
# Iterate on each item.
|
186
202
|
# Equivalent to
|
187
203
|
# @data.each{|x| yield x}
|
@@ -190,7 +206,6 @@ module Statsample
|
|
190
206
|
end
|
191
207
|
|
192
208
|
# Iterate on each item, retrieving index
|
193
|
-
|
194
209
|
def each_index
|
195
210
|
(0...@data.size).each {|i|
|
196
211
|
yield(i)
|
@@ -520,7 +535,6 @@ module Statsample
|
|
520
535
|
# Retrieves uniques values for data.
|
521
536
|
def factors
|
522
537
|
if @type==:scale
|
523
|
-
|
524
538
|
@scale_data.uniq.sort
|
525
539
|
else
|
526
540
|
@valid_data.uniq.sort
|
data/test/test_anova.rb
CHANGED
@@ -4,24 +4,24 @@ require 'test/unit'
|
|
4
4
|
|
5
5
|
class StatsampleAnovaTestCase < Test::Unit::TestCase
|
6
6
|
def initialize(*args)
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
@v1=[3,3,2,3,6].to_vector(:scale)
|
8
|
+
@v2=[7,6,5,6,7].to_vector(:scale)
|
9
|
+
@v3=[9,8,9,7,8].to_vector(:scale)
|
10
|
+
@anova=Statsample::Anova::OneWay.new([@v1,@v2,@v3])
|
11
11
|
super
|
12
12
|
end
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
13
|
+
def test_basic
|
14
|
+
assert_in_delta(72.933, @anova.sst,0.001)
|
15
|
+
assert_in_delta(14.8,@anova.sswg,0.001)
|
16
|
+
assert_in_delta(58.133,@anova.ssbg,0.001)
|
17
|
+
assert_in_delta(@anova.sst,@anova.sswg+@anova.ssbg,0.00001)
|
18
|
+
assert_equal(14,@anova.df_total)
|
19
|
+
assert_equal(12,@anova.df_wg)
|
20
|
+
assert_equal(2,@anova.df_bg)
|
21
|
+
assert_in_delta(23.568,@anova.f,0.001)
|
22
|
+
anova2=Statsample::Anova::OneWay.new([@v1,@v1,@v1,@v1,@v2])
|
23
|
+
assert_in_delta(3.960, anova2.f,0.001)
|
24
24
|
assert(@anova.significance<0.01)
|
25
25
|
assert_in_delta(0.016, anova2.significance,0.001)
|
26
|
-
|
26
|
+
end
|
27
27
|
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
|
+
require 'statsample'
|
3
|
+
require 'test/unit'
|
4
|
+
class StatsampleBivariateTestCase < Test::Unit::TestCase
|
5
|
+
def test_sum_of_codeviated
|
6
|
+
v1=[1,2,3,4,5,6].to_vector(:scale)
|
7
|
+
v2=[6,2,4,10,12,8].to_vector(:scale)
|
8
|
+
assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
|
9
|
+
end
|
10
|
+
def test_pearson
|
11
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
12
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
13
|
+
assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
|
14
|
+
v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
|
15
|
+
v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
|
16
|
+
assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
|
17
|
+
end
|
18
|
+
def test_tetrachoric_matrix
|
19
|
+
ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
|
20
|
+
tcm_obs=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
|
21
|
+
tcm_exp=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_matrix.txt", %w{a b c d e}).to_matrix
|
22
|
+
tcm_obs.row_size.times {|i|
|
23
|
+
tcm_obs.column_size {|j|
|
24
|
+
assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
|
25
|
+
}
|
26
|
+
}
|
27
|
+
end
|
28
|
+
def test_tetrachoric
|
29
|
+
a,b,c,d=0,0,0,0
|
30
|
+
assert_raise RuntimeError do
|
31
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
32
|
+
end
|
33
|
+
a,b,c,d=10,10,0,0
|
34
|
+
assert_raise RuntimeError do
|
35
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
36
|
+
end
|
37
|
+
a,b,c,d=10,0,10,0
|
38
|
+
assert_raise RuntimeError do
|
39
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
40
|
+
end
|
41
|
+
a,b,c,d=10,0,0,10
|
42
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
43
|
+
assert_equal(1,tc.r)
|
44
|
+
assert_equal(0,tc.se)
|
45
|
+
a,b,c,d=0,10,10,0
|
46
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
47
|
+
assert_equal(-1,tc.r)
|
48
|
+
assert_equal(0,tc.se)
|
49
|
+
|
50
|
+
a,b,c,d = 30,40,70,20
|
51
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
52
|
+
assert_in_delta(-0.53980,tc.r,0.0001)
|
53
|
+
assert_in_delta(0.09940,tc.se,0.0001)
|
54
|
+
assert_in_delta(0.31864,tc.threshold_x,0.0001)
|
55
|
+
assert_in_delta(-0.15731,tc.threshold_y,0.0001)
|
56
|
+
x=%w{a a a a b b b a b b a a b b}.to_vector
|
57
|
+
y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
|
58
|
+
# crosstab
|
59
|
+
# 0 1
|
60
|
+
# a 4 3
|
61
|
+
# b 2 5
|
62
|
+
a,b,c,d=4,3,2,5
|
63
|
+
tc1 = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
64
|
+
tc2 = Statsample::Bivariate::Tetrachoric.new_with_vectors(x,y)
|
65
|
+
assert_equal(tc1.r,tc2.r)
|
66
|
+
assert_equal(tc1.se,tc2.se)
|
67
|
+
|
68
|
+
end
|
69
|
+
def test_matrix_correlation
|
70
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
71
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
72
|
+
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
73
|
+
v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
|
74
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
|
75
|
+
c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
|
76
|
+
expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
|
77
|
+
[c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
|
78
|
+
]
|
79
|
+
obt=Statsample::Bivariate.correlation_matrix(ds)
|
80
|
+
for i in 0...expected.row_size
|
81
|
+
for j in 0...expected.column_size
|
82
|
+
#puts expected[i,j].inspect
|
83
|
+
#puts obt[i,j].inspect
|
84
|
+
assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
#assert_equal(expected,obt)
|
88
|
+
end
|
89
|
+
def test_prop_pearson
|
90
|
+
assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
|
91
|
+
assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
|
92
|
+
r=0.9
|
93
|
+
n=100
|
94
|
+
t=Statsample::Bivariate.t_r(r,n)
|
95
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
|
96
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:right)<0.05)
|
97
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)
|
98
|
+
|
99
|
+
r=-0.9
|
100
|
+
n=100
|
101
|
+
t=Statsample::Bivariate.t_r(r,n)
|
102
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
|
103
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:right)>0.05)
|
104
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:left)<0.05)
|
105
|
+
end
|
106
|
+
def test_covariance
|
107
|
+
if HAS_GSL
|
108
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
109
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
110
|
+
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
111
|
+
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_spearman
|
116
|
+
v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
|
117
|
+
v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
|
118
|
+
assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
|
119
|
+
|
120
|
+
end
|
121
|
+
def test_point_biserial
|
122
|
+
c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
|
123
|
+
d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
|
124
|
+
assert_raise TypeError do
|
125
|
+
Statsample::Bivariate.point_biserial(c,d)
|
126
|
+
end
|
127
|
+
assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
|
128
|
+
end
|
129
|
+
def test_tau
|
130
|
+
v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
|
131
|
+
v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
|
132
|
+
assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
|
133
|
+
assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
|
134
|
+
v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
|
135
|
+
v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
|
136
|
+
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
|
137
|
+
end
|
138
|
+
def test_gamma
|
139
|
+
m=Matrix[[10,5,2],[10,15,20]]
|
140
|
+
assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
|
141
|
+
m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
|
142
|
+
assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
|
143
|
+
|
144
|
+
|
145
|
+
end
|
146
|
+
end
|
data/test/test_csv.rb
CHANGED
@@ -26,6 +26,12 @@ class StatsampleCSVTestCase < Test::Unit::TestCase
|
|
26
26
|
def test_nil
|
27
27
|
assert_equal(nil,@ds['age'][5])
|
28
28
|
end
|
29
|
+
def test_repeated
|
30
|
+
ds=Statsample::CSV.read(File.dirname(__FILE__)+"/../data/repeated_fields.csv")
|
31
|
+
assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
|
32
|
+
age=[3,4,5,6,nil,8].to_vector(:scale)
|
33
|
+
assert_equal(age,ds['age_2'])
|
34
|
+
end
|
29
35
|
def test_write
|
30
36
|
filename=Dir::tmpdir+"/test_write.csv"
|
31
37
|
Statsample::CSV.write(@ds,filename)
|
data/test/test_dataset.rb
CHANGED
@@ -13,10 +13,10 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
13
13
|
assert_equal(%w{id name age city a1}, @ds.fields)
|
14
14
|
end
|
15
15
|
def test_saveload
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
outfile=Dir::tmpdir+"/dataset.ds"
|
17
|
+
@ds.save(outfile)
|
18
|
+
a=Statsample.load(outfile)
|
19
|
+
assert_equal(@ds,a)
|
20
20
|
end
|
21
21
|
|
22
22
|
def test_matrix
|
@@ -31,6 +31,24 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
31
31
|
@ds.fields=%w{id name age}
|
32
32
|
assert_equal(%w{id name age a1 city}, @ds.fields)
|
33
33
|
end
|
34
|
+
def test_merge
|
35
|
+
a=[1,2,3].to_scale
|
36
|
+
b=[3,4,5].to_vector
|
37
|
+
c=[4,5,6].to_scale
|
38
|
+
d=[7,8,9].to_vector
|
39
|
+
e=[10,20,30].to_vector
|
40
|
+
ds1={'a'=>a,'b'=>b}.to_dataset
|
41
|
+
ds2={'c'=>c,'d'=>d}.to_dataset
|
42
|
+
exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
43
|
+
|
44
|
+
assert_equal(exp,ds1.merge(ds2))
|
45
|
+
exp.fields=%w{c d a b}
|
46
|
+
assert_equal(exp,ds2.merge(ds1))
|
47
|
+
ds3={'a'=>e}.to_dataset
|
48
|
+
exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
|
49
|
+
exp.fields=%w{a_1 b a_2}
|
50
|
+
assert_equal(exp,ds1.merge(ds3))
|
51
|
+
end
|
34
52
|
def test_each_vector
|
35
53
|
a=[1,2,3].to_vector
|
36
54
|
b=[3,4,5].to_vector
|
@@ -185,7 +203,18 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
185
203
|
assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
|
186
204
|
assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
|
187
205
|
end
|
188
|
-
|
206
|
+
def test_percentiles
|
207
|
+
v1=(1..100).to_a.to_scale
|
208
|
+
assert_equal(50.5,v1.median)
|
209
|
+
assert_equal(25.5, v1.percentil(25))
|
210
|
+
v2=(1..99).to_a.to_scale
|
211
|
+
assert_equal(50,v2.median)
|
212
|
+
assert_equal(25,v2.percentil(25))
|
213
|
+
v3=(1..50).to_a.to_scale
|
214
|
+
assert_equal(25.5, v3.median)
|
215
|
+
assert_equal(13, v3.percentil(25))
|
216
|
+
|
217
|
+
end
|
189
218
|
def test_add_case
|
190
219
|
ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
|
191
220
|
ds.add_case([1,2,3])
|
@@ -326,5 +355,20 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
326
355
|
assert_equal(vsum,ds.compute("v1+v2+v3"))
|
327
356
|
assert_equal(vmult,ds.compute("v1*v2"))
|
328
357
|
|
358
|
+
end
|
359
|
+
def test_crosstab_with_asignation
|
360
|
+
v1=%w{a a a b b b c c c}.to_vector
|
361
|
+
v2=%w{a b c a b c a b c}.to_vector
|
362
|
+
v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
|
363
|
+
ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
|
364
|
+
assert_equal(:nominal, ds['_id'].type)
|
365
|
+
assert_equal(:scale, ds['a'].type)
|
366
|
+
assert_equal(:scale, ds['b'].type)
|
367
|
+
ev_id=%w{a b c}.to_vector
|
368
|
+
ev_a =%w{0 0 0}.to_scale
|
369
|
+
ev_b =%w{1 1 0}.to_scale
|
370
|
+
ev_c =%w{0 1 1}.to_scale
|
371
|
+
ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
|
372
|
+
assert_equal(ds, ds2)
|
329
373
|
end
|
330
374
|
end
|
data/test/test_statistics.rb
CHANGED
@@ -6,6 +6,11 @@ class StatsampleStatisicsTestCase < Test::Unit::TestCase
|
|
6
6
|
def initialize(*args)
|
7
7
|
super
|
8
8
|
end
|
9
|
+
def test_recode_repeated
|
10
|
+
a=%w{a b c c d d d e}
|
11
|
+
exp=["a","b","c_1","c_2","d_1","d_2","d_3","e"]
|
12
|
+
assert_equal(exp,a.recode_repeated)
|
13
|
+
end
|
9
14
|
def test_is_number
|
10
15
|
assert("10".is_number?)
|
11
16
|
assert("-10".is_number?)
|
@@ -30,96 +35,7 @@ class StatsampleStatisicsTestCase < Test::Unit::TestCase
|
|
30
35
|
chi=Statsample::Test.chi_square(real,expected)
|
31
36
|
assert_in_delta(32.53,chi,0.1)
|
32
37
|
end
|
33
|
-
|
34
|
-
v1=[1,2,3,4,5,6].to_vector(:scale)
|
35
|
-
v2=[6,2,4,10,12,8].to_vector(:scale)
|
36
|
-
assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
|
37
|
-
end
|
38
|
-
def test_pearson
|
39
|
-
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
40
|
-
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
41
|
-
assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
|
42
|
-
v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
|
43
|
-
v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
|
44
|
-
assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
|
45
|
-
end
|
46
|
-
def test_matrix_correlation
|
47
|
-
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
48
|
-
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
49
|
-
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
50
|
-
v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
|
51
|
-
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
|
52
|
-
c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
|
53
|
-
expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
|
54
|
-
[c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
|
55
|
-
]
|
56
|
-
obt=Statsample::Bivariate.correlation_matrix(ds)
|
57
|
-
for i in 0...expected.row_size
|
58
|
-
for j in 0...expected.column_size
|
59
|
-
#puts expected[i,j].inspect
|
60
|
-
#puts obt[i,j].inspect
|
61
|
-
assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
|
62
|
-
end
|
63
|
-
end
|
64
|
-
#assert_equal(expected,obt)
|
65
|
-
end
|
66
|
-
def test_prop_pearson
|
67
|
-
assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
|
68
|
-
assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
|
69
|
-
r=0.9
|
70
|
-
n=100
|
71
|
-
t=Statsample::Bivariate.t_r(r,n)
|
72
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
|
73
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:right)<0.05)
|
74
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)
|
75
|
-
|
76
|
-
r=-0.9
|
77
|
-
n=100
|
78
|
-
t=Statsample::Bivariate.t_r(r,n)
|
79
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
|
80
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:right)>0.05)
|
81
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:left)<0.05)
|
82
|
-
end
|
83
|
-
def test_covariance
|
84
|
-
if HAS_GSL
|
85
|
-
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
86
|
-
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
87
|
-
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
88
|
-
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_spearman
|
93
|
-
v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
|
94
|
-
v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
|
95
|
-
assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
|
96
|
-
|
97
|
-
end
|
98
|
-
def test_point_biserial
|
99
|
-
c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
|
100
|
-
d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
|
101
|
-
assert_raise TypeError do
|
102
|
-
Statsample::Bivariate.point_biserial(c,d)
|
103
|
-
end
|
104
|
-
assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
|
105
|
-
end
|
106
|
-
def test_tau
|
107
|
-
v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
|
108
|
-
v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
|
109
|
-
assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
|
110
|
-
assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
|
111
|
-
v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
|
112
|
-
v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
|
113
|
-
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
|
114
|
-
end
|
115
|
-
def test_gamma
|
116
|
-
m=Matrix[[10,5,2],[10,15,20]]
|
117
|
-
assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
|
118
|
-
m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
|
119
|
-
assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
|
120
|
-
|
121
|
-
|
122
|
-
end
|
38
|
+
|
123
39
|
def test_estimation_mean
|
124
40
|
v=([42]*23+[41]*4+[36]*1+[32]*1+[29]*1+[27]*2+[23]*1+[19]*1+[16]*2+[15]*2+[14,11,10,9,7]+ [6]*3+[5]*2+[4,3]).to_vector(:scale)
|
125
41
|
assert_equal(50,v.size)
|
data/test/test_vector.rb
CHANGED
@@ -2,7 +2,9 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
4
|
require 'tmpdir'
|
5
|
-
class
|
5
|
+
class TestStatsample
|
6
|
+
end
|
7
|
+
class TestStatsample::TestVector < Test::Unit::TestCase
|
6
8
|
|
7
9
|
def setup
|
8
10
|
@c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
|
@@ -121,15 +123,15 @@ class StatsampleVectorTestCase < Test::Unit::TestCase
|
|
121
123
|
end
|
122
124
|
end
|
123
125
|
def test_nominal
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
126
|
+
assert_equal(@c[1],5)
|
127
|
+
assert_equal({ 1=>1,2=>1,3=>1,4=>1,5=>5,6=>2,7=>1,8=>1, 9=>1,10=>1},@c.frequencies)
|
128
|
+
assert_equal({ 1=>1,2=>1,3=>1,4=>1,5=>5,6=>2,7=>1,8=>1, 9=>1,10=>1},@c._frequencies)
|
129
|
+
assert_equal({ 1 => 1.quo(15) ,2=>1.quo(15), 3=>1.quo(15),4=>1.quo(15),5=>5.quo(15),6=>2.quo(15),7=>1.quo(15), 8=>1.quo(15), 9=>1.quo(15),10=>1.quo(15)}, @c.proportions)
|
130
|
+
assert_equal(@c.proportion, 1.quo(15))
|
131
|
+
assert_equal(@c.proportion(2), 1.quo(15))
|
132
|
+
assert_equal([1,2,3,4,5,6,7,8,9,10], @c.factors.sort)
|
133
|
+
assert_equal(@c.mode,5)
|
134
|
+
assert_equal(@c.n_valid,15)
|
133
135
|
end
|
134
136
|
def test_equality
|
135
137
|
v1=[1,2,3].to_vector
|
@@ -328,4 +330,19 @@ class StatsampleVectorTestCase < Test::Unit::TestCase
|
|
328
330
|
expected=[2,2,2,4.5,4.5,6,7.5,7.5,10,10,10].to_vector(:ordinal)
|
329
331
|
assert_equal(expected,a.ranked)
|
330
332
|
end
|
333
|
+
def test_dichotomize
|
334
|
+
a= [0,0,0,1,2,3,nil].to_vector
|
335
|
+
exp=[0,0,0,1,1,1,nil].to_scale
|
336
|
+
assert_equal(exp,a.dichotomize)
|
337
|
+
a= [1,1,1,2,2,2,3].to_vector
|
338
|
+
exp=[0,0,0,1,1,1,1].to_scale
|
339
|
+
assert_equal(exp,a.dichotomize)
|
340
|
+
a= [0,0,0,1,2,3,nil].to_vector
|
341
|
+
exp=[0,0,0,0,1,1,nil].to_scale
|
342
|
+
assert_equal(exp,a.dichotomize(1))
|
343
|
+
a= %w{a a a b c d}.to_vector
|
344
|
+
exp=[0,0,0,1,1,1].to_scale
|
345
|
+
assert_equal(exp, a.dichotomize)
|
346
|
+
end
|
347
|
+
|
331
348
|
end
|