statsample 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +11 -0
- data/Manifest.txt +7 -0
- data/README.txt +3 -3
- data/data/repeated_fields.csv +7 -0
- data/data/tetmat_matrix.txt +5 -0
- data/data/tetmat_test.txt +1001 -0
- data/demo/spss_matrix.rb +3 -0
- data/lib/spss.rb +1 -1
- data/lib/statistics2.rb +1 -1
- data/lib/statsample.rb +30 -1
- data/lib/statsample/anova.rb +62 -66
- data/lib/statsample/bivariate.rb +273 -281
- data/lib/statsample/bivariate/tetrachoric.rb +418 -0
- data/lib/statsample/codification.rb +15 -15
- data/lib/statsample/combination.rb +108 -106
- data/lib/statsample/converter/csv18.rb +52 -52
- data/lib/statsample/converter/csv19.rb +45 -48
- data/lib/statsample/converter/spss.rb +47 -0
- data/lib/statsample/converters.rb +74 -77
- data/lib/statsample/crosstab.rb +21 -17
- data/lib/statsample/dataset.rb +595 -543
- data/lib/statsample/dominanceanalysis.rb +7 -10
- data/lib/statsample/htmlreport.rb +23 -0
- data/lib/statsample/regression/multiple/baseengine.rb +59 -59
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/reliability.rb +165 -145
- data/lib/statsample/vector.rb +16 -2
- data/test/test_anova.rb +16 -16
- data/test/test_bivariate.rb +146 -0
- data/test/test_csv.rb +6 -0
- data/test/test_dataset.rb +49 -5
- data/test/test_statistics.rb +6 -90
- data/test/test_vector.rb +27 -10
- metadata +10 -4
- data/test/test_r.rb +0 -9
- data/test/test_stata.rb +0 -11
data/lib/statsample/vector.rb
CHANGED
@@ -182,6 +182,22 @@ module Statsample
|
|
182
182
|
}
|
183
183
|
set_valid_data
|
184
184
|
end
|
185
|
+
# Dicotomize the vector with 0 and 1, based on lowest value
|
186
|
+
# If parameter if defined, this value and lower
|
187
|
+
# will be 0 and higher, 1
|
188
|
+
def dichotomize(low=nil)
|
189
|
+
fs=factors
|
190
|
+
low||=factors.min
|
191
|
+
@data_with_nils.collect{|x|
|
192
|
+
if x.nil?
|
193
|
+
nil
|
194
|
+
elsif x>low
|
195
|
+
1
|
196
|
+
else
|
197
|
+
0
|
198
|
+
end
|
199
|
+
}.to_scale
|
200
|
+
end
|
185
201
|
# Iterate on each item.
|
186
202
|
# Equivalent to
|
187
203
|
# @data.each{|x| yield x}
|
@@ -190,7 +206,6 @@ module Statsample
|
|
190
206
|
end
|
191
207
|
|
192
208
|
# Iterate on each item, retrieving index
|
193
|
-
|
194
209
|
def each_index
|
195
210
|
(0...@data.size).each {|i|
|
196
211
|
yield(i)
|
@@ -520,7 +535,6 @@ module Statsample
|
|
520
535
|
# Retrieves uniques values for data.
|
521
536
|
def factors
|
522
537
|
if @type==:scale
|
523
|
-
|
524
538
|
@scale_data.uniq.sort
|
525
539
|
else
|
526
540
|
@valid_data.uniq.sort
|
data/test/test_anova.rb
CHANGED
@@ -4,24 +4,24 @@ require 'test/unit'
|
|
4
4
|
|
5
5
|
class StatsampleAnovaTestCase < Test::Unit::TestCase
|
6
6
|
def initialize(*args)
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
@v1=[3,3,2,3,6].to_vector(:scale)
|
8
|
+
@v2=[7,6,5,6,7].to_vector(:scale)
|
9
|
+
@v3=[9,8,9,7,8].to_vector(:scale)
|
10
|
+
@anova=Statsample::Anova::OneWay.new([@v1,@v2,@v3])
|
11
11
|
super
|
12
12
|
end
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
13
|
+
def test_basic
|
14
|
+
assert_in_delta(72.933, @anova.sst,0.001)
|
15
|
+
assert_in_delta(14.8,@anova.sswg,0.001)
|
16
|
+
assert_in_delta(58.133,@anova.ssbg,0.001)
|
17
|
+
assert_in_delta(@anova.sst,@anova.sswg+@anova.ssbg,0.00001)
|
18
|
+
assert_equal(14,@anova.df_total)
|
19
|
+
assert_equal(12,@anova.df_wg)
|
20
|
+
assert_equal(2,@anova.df_bg)
|
21
|
+
assert_in_delta(23.568,@anova.f,0.001)
|
22
|
+
anova2=Statsample::Anova::OneWay.new([@v1,@v1,@v1,@v1,@v2])
|
23
|
+
assert_in_delta(3.960, anova2.f,0.001)
|
24
24
|
assert(@anova.significance<0.01)
|
25
25
|
assert_in_delta(0.016, anova2.significance,0.001)
|
26
|
-
|
26
|
+
end
|
27
27
|
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
|
+
require 'statsample'
|
3
|
+
require 'test/unit'
|
4
|
+
class StatsampleBivariateTestCase < Test::Unit::TestCase
|
5
|
+
def test_sum_of_codeviated
|
6
|
+
v1=[1,2,3,4,5,6].to_vector(:scale)
|
7
|
+
v2=[6,2,4,10,12,8].to_vector(:scale)
|
8
|
+
assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
|
9
|
+
end
|
10
|
+
def test_pearson
|
11
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
12
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
13
|
+
assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
|
14
|
+
v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
|
15
|
+
v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
|
16
|
+
assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
|
17
|
+
end
|
18
|
+
def test_tetrachoric_matrix
|
19
|
+
ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
|
20
|
+
tcm_obs=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
|
21
|
+
tcm_exp=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_matrix.txt", %w{a b c d e}).to_matrix
|
22
|
+
tcm_obs.row_size.times {|i|
|
23
|
+
tcm_obs.column_size {|j|
|
24
|
+
assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
|
25
|
+
}
|
26
|
+
}
|
27
|
+
end
|
28
|
+
def test_tetrachoric
|
29
|
+
a,b,c,d=0,0,0,0
|
30
|
+
assert_raise RuntimeError do
|
31
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
32
|
+
end
|
33
|
+
a,b,c,d=10,10,0,0
|
34
|
+
assert_raise RuntimeError do
|
35
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
36
|
+
end
|
37
|
+
a,b,c,d=10,0,10,0
|
38
|
+
assert_raise RuntimeError do
|
39
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
40
|
+
end
|
41
|
+
a,b,c,d=10,0,0,10
|
42
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
43
|
+
assert_equal(1,tc.r)
|
44
|
+
assert_equal(0,tc.se)
|
45
|
+
a,b,c,d=0,10,10,0
|
46
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
47
|
+
assert_equal(-1,tc.r)
|
48
|
+
assert_equal(0,tc.se)
|
49
|
+
|
50
|
+
a,b,c,d = 30,40,70,20
|
51
|
+
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
52
|
+
assert_in_delta(-0.53980,tc.r,0.0001)
|
53
|
+
assert_in_delta(0.09940,tc.se,0.0001)
|
54
|
+
assert_in_delta(0.31864,tc.threshold_x,0.0001)
|
55
|
+
assert_in_delta(-0.15731,tc.threshold_y,0.0001)
|
56
|
+
x=%w{a a a a b b b a b b a a b b}.to_vector
|
57
|
+
y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
|
58
|
+
# crosstab
|
59
|
+
# 0 1
|
60
|
+
# a 4 3
|
61
|
+
# b 2 5
|
62
|
+
a,b,c,d=4,3,2,5
|
63
|
+
tc1 = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
64
|
+
tc2 = Statsample::Bivariate::Tetrachoric.new_with_vectors(x,y)
|
65
|
+
assert_equal(tc1.r,tc2.r)
|
66
|
+
assert_equal(tc1.se,tc2.se)
|
67
|
+
|
68
|
+
end
|
69
|
+
def test_matrix_correlation
|
70
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
71
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
72
|
+
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
73
|
+
v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
|
74
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
|
75
|
+
c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
|
76
|
+
expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
|
77
|
+
[c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
|
78
|
+
]
|
79
|
+
obt=Statsample::Bivariate.correlation_matrix(ds)
|
80
|
+
for i in 0...expected.row_size
|
81
|
+
for j in 0...expected.column_size
|
82
|
+
#puts expected[i,j].inspect
|
83
|
+
#puts obt[i,j].inspect
|
84
|
+
assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
#assert_equal(expected,obt)
|
88
|
+
end
|
89
|
+
def test_prop_pearson
|
90
|
+
assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
|
91
|
+
assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
|
92
|
+
r=0.9
|
93
|
+
n=100
|
94
|
+
t=Statsample::Bivariate.t_r(r,n)
|
95
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
|
96
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:right)<0.05)
|
97
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)
|
98
|
+
|
99
|
+
r=-0.9
|
100
|
+
n=100
|
101
|
+
t=Statsample::Bivariate.t_r(r,n)
|
102
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
|
103
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:right)>0.05)
|
104
|
+
assert(Statsample::Bivariate.prop_pearson(t,n,:left)<0.05)
|
105
|
+
end
|
106
|
+
def test_covariance
|
107
|
+
if HAS_GSL
|
108
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
109
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
110
|
+
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
111
|
+
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_spearman
|
116
|
+
v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
|
117
|
+
v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
|
118
|
+
assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
|
119
|
+
|
120
|
+
end
|
121
|
+
def test_point_biserial
|
122
|
+
c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
|
123
|
+
d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
|
124
|
+
assert_raise TypeError do
|
125
|
+
Statsample::Bivariate.point_biserial(c,d)
|
126
|
+
end
|
127
|
+
assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
|
128
|
+
end
|
129
|
+
def test_tau
|
130
|
+
v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
|
131
|
+
v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
|
132
|
+
assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
|
133
|
+
assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
|
134
|
+
v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
|
135
|
+
v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
|
136
|
+
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
|
137
|
+
end
|
138
|
+
def test_gamma
|
139
|
+
m=Matrix[[10,5,2],[10,15,20]]
|
140
|
+
assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
|
141
|
+
m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
|
142
|
+
assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
|
143
|
+
|
144
|
+
|
145
|
+
end
|
146
|
+
end
|
data/test/test_csv.rb
CHANGED
@@ -26,6 +26,12 @@ class StatsampleCSVTestCase < Test::Unit::TestCase
|
|
26
26
|
def test_nil
|
27
27
|
assert_equal(nil,@ds['age'][5])
|
28
28
|
end
|
29
|
+
def test_repeated
|
30
|
+
ds=Statsample::CSV.read(File.dirname(__FILE__)+"/../data/repeated_fields.csv")
|
31
|
+
assert_equal(%w{id name_1 age_1 city a1 name_2 age_2},ds.fields)
|
32
|
+
age=[3,4,5,6,nil,8].to_vector(:scale)
|
33
|
+
assert_equal(age,ds['age_2'])
|
34
|
+
end
|
29
35
|
def test_write
|
30
36
|
filename=Dir::tmpdir+"/test_write.csv"
|
31
37
|
Statsample::CSV.write(@ds,filename)
|
data/test/test_dataset.rb
CHANGED
@@ -13,10 +13,10 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
13
13
|
assert_equal(%w{id name age city a1}, @ds.fields)
|
14
14
|
end
|
15
15
|
def test_saveload
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
outfile=Dir::tmpdir+"/dataset.ds"
|
17
|
+
@ds.save(outfile)
|
18
|
+
a=Statsample.load(outfile)
|
19
|
+
assert_equal(@ds,a)
|
20
20
|
end
|
21
21
|
|
22
22
|
def test_matrix
|
@@ -31,6 +31,24 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
31
31
|
@ds.fields=%w{id name age}
|
32
32
|
assert_equal(%w{id name age a1 city}, @ds.fields)
|
33
33
|
end
|
34
|
+
def test_merge
|
35
|
+
a=[1,2,3].to_scale
|
36
|
+
b=[3,4,5].to_vector
|
37
|
+
c=[4,5,6].to_scale
|
38
|
+
d=[7,8,9].to_vector
|
39
|
+
e=[10,20,30].to_vector
|
40
|
+
ds1={'a'=>a,'b'=>b}.to_dataset
|
41
|
+
ds2={'c'=>c,'d'=>d}.to_dataset
|
42
|
+
exp={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
43
|
+
|
44
|
+
assert_equal(exp,ds1.merge(ds2))
|
45
|
+
exp.fields=%w{c d a b}
|
46
|
+
assert_equal(exp,ds2.merge(ds1))
|
47
|
+
ds3={'a'=>e}.to_dataset
|
48
|
+
exp={'a_1'=>a,'b'=>b,'a_2'=>e}.to_dataset
|
49
|
+
exp.fields=%w{a_1 b a_2}
|
50
|
+
assert_equal(exp,ds1.merge(ds3))
|
51
|
+
end
|
34
52
|
def test_each_vector
|
35
53
|
a=[1,2,3].to_vector
|
36
54
|
b=[3,4,5].to_vector
|
@@ -185,7 +203,18 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
185
203
|
assert_equal([1,1,0,nil,1],@ds.col('a1_b').to_a)
|
186
204
|
assert_equal([0,1,0,nil,1],@ds.col('a1_c').to_a)
|
187
205
|
end
|
188
|
-
|
206
|
+
def test_percentiles
|
207
|
+
v1=(1..100).to_a.to_scale
|
208
|
+
assert_equal(50.5,v1.median)
|
209
|
+
assert_equal(25.5, v1.percentil(25))
|
210
|
+
v2=(1..99).to_a.to_scale
|
211
|
+
assert_equal(50,v2.median)
|
212
|
+
assert_equal(25,v2.percentil(25))
|
213
|
+
v3=(1..50).to_a.to_scale
|
214
|
+
assert_equal(25.5, v3.median)
|
215
|
+
assert_equal(13, v3.percentil(25))
|
216
|
+
|
217
|
+
end
|
189
218
|
def test_add_case
|
190
219
|
ds=Statsample::Dataset.new({'a'=>[].to_vector, 'b'=>[].to_vector, 'c'=>[].to_vector})
|
191
220
|
ds.add_case([1,2,3])
|
@@ -326,5 +355,20 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
|
|
326
355
|
assert_equal(vsum,ds.compute("v1+v2+v3"))
|
327
356
|
assert_equal(vmult,ds.compute("v1*v2"))
|
328
357
|
|
358
|
+
end
|
359
|
+
def test_crosstab_with_asignation
|
360
|
+
v1=%w{a a a b b b c c c}.to_vector
|
361
|
+
v2=%w{a b c a b c a b c}.to_vector
|
362
|
+
v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
|
363
|
+
ds=Statsample::Dataset.crosstab_by_asignation(v1,v2,v3)
|
364
|
+
assert_equal(:nominal, ds['_id'].type)
|
365
|
+
assert_equal(:scale, ds['a'].type)
|
366
|
+
assert_equal(:scale, ds['b'].type)
|
367
|
+
ev_id=%w{a b c}.to_vector
|
368
|
+
ev_a =%w{0 0 0}.to_scale
|
369
|
+
ev_b =%w{1 1 0}.to_scale
|
370
|
+
ev_c =%w{0 1 1}.to_scale
|
371
|
+
ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
|
372
|
+
assert_equal(ds, ds2)
|
329
373
|
end
|
330
374
|
end
|
data/test/test_statistics.rb
CHANGED
@@ -6,6 +6,11 @@ class StatsampleStatisicsTestCase < Test::Unit::TestCase
|
|
6
6
|
def initialize(*args)
|
7
7
|
super
|
8
8
|
end
|
9
|
+
def test_recode_repeated
|
10
|
+
a=%w{a b c c d d d e}
|
11
|
+
exp=["a","b","c_1","c_2","d_1","d_2","d_3","e"]
|
12
|
+
assert_equal(exp,a.recode_repeated)
|
13
|
+
end
|
9
14
|
def test_is_number
|
10
15
|
assert("10".is_number?)
|
11
16
|
assert("-10".is_number?)
|
@@ -30,96 +35,7 @@ class StatsampleStatisicsTestCase < Test::Unit::TestCase
|
|
30
35
|
chi=Statsample::Test.chi_square(real,expected)
|
31
36
|
assert_in_delta(32.53,chi,0.1)
|
32
37
|
end
|
33
|
-
|
34
|
-
v1=[1,2,3,4,5,6].to_vector(:scale)
|
35
|
-
v2=[6,2,4,10,12,8].to_vector(:scale)
|
36
|
-
assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
|
37
|
-
end
|
38
|
-
def test_pearson
|
39
|
-
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
40
|
-
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
41
|
-
assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
|
42
|
-
v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
|
43
|
-
v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
|
44
|
-
assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
|
45
|
-
end
|
46
|
-
def test_matrix_correlation
|
47
|
-
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
48
|
-
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
49
|
-
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
50
|
-
v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
|
51
|
-
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
|
52
|
-
c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
|
53
|
-
expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
|
54
|
-
[c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
|
55
|
-
]
|
56
|
-
obt=Statsample::Bivariate.correlation_matrix(ds)
|
57
|
-
for i in 0...expected.row_size
|
58
|
-
for j in 0...expected.column_size
|
59
|
-
#puts expected[i,j].inspect
|
60
|
-
#puts obt[i,j].inspect
|
61
|
-
assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
|
62
|
-
end
|
63
|
-
end
|
64
|
-
#assert_equal(expected,obt)
|
65
|
-
end
|
66
|
-
def test_prop_pearson
|
67
|
-
assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
|
68
|
-
assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
|
69
|
-
r=0.9
|
70
|
-
n=100
|
71
|
-
t=Statsample::Bivariate.t_r(r,n)
|
72
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
|
73
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:right)<0.05)
|
74
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)
|
75
|
-
|
76
|
-
r=-0.9
|
77
|
-
n=100
|
78
|
-
t=Statsample::Bivariate.t_r(r,n)
|
79
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
|
80
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:right)>0.05)
|
81
|
-
assert(Statsample::Bivariate.prop_pearson(t,n,:left)<0.05)
|
82
|
-
end
|
83
|
-
def test_covariance
|
84
|
-
if HAS_GSL
|
85
|
-
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
86
|
-
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
87
|
-
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
88
|
-
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_spearman
|
93
|
-
v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
|
94
|
-
v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
|
95
|
-
assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
|
96
|
-
|
97
|
-
end
|
98
|
-
def test_point_biserial
|
99
|
-
c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
|
100
|
-
d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
|
101
|
-
assert_raise TypeError do
|
102
|
-
Statsample::Bivariate.point_biserial(c,d)
|
103
|
-
end
|
104
|
-
assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
|
105
|
-
end
|
106
|
-
def test_tau
|
107
|
-
v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
|
108
|
-
v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
|
109
|
-
assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
|
110
|
-
assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
|
111
|
-
v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
|
112
|
-
v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
|
113
|
-
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
|
114
|
-
end
|
115
|
-
def test_gamma
|
116
|
-
m=Matrix[[10,5,2],[10,15,20]]
|
117
|
-
assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
|
118
|
-
m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
|
119
|
-
assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
|
120
|
-
|
121
|
-
|
122
|
-
end
|
38
|
+
|
123
39
|
def test_estimation_mean
|
124
40
|
v=([42]*23+[41]*4+[36]*1+[32]*1+[29]*1+[27]*2+[23]*1+[19]*1+[16]*2+[15]*2+[14,11,10,9,7]+ [6]*3+[5]*2+[4,3]).to_vector(:scale)
|
125
41
|
assert_equal(50,v.size)
|
data/test/test_vector.rb
CHANGED
@@ -2,7 +2,9 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
4
|
require 'tmpdir'
|
5
|
-
class
|
5
|
+
class TestStatsample
|
6
|
+
end
|
7
|
+
class TestStatsample::TestVector < Test::Unit::TestCase
|
6
8
|
|
7
9
|
def setup
|
8
10
|
@c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99], :nominal)
|
@@ -121,15 +123,15 @@ class StatsampleVectorTestCase < Test::Unit::TestCase
|
|
121
123
|
end
|
122
124
|
end
|
123
125
|
def test_nominal
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
126
|
+
assert_equal(@c[1],5)
|
127
|
+
assert_equal({ 1=>1,2=>1,3=>1,4=>1,5=>5,6=>2,7=>1,8=>1, 9=>1,10=>1},@c.frequencies)
|
128
|
+
assert_equal({ 1=>1,2=>1,3=>1,4=>1,5=>5,6=>2,7=>1,8=>1, 9=>1,10=>1},@c._frequencies)
|
129
|
+
assert_equal({ 1 => 1.quo(15) ,2=>1.quo(15), 3=>1.quo(15),4=>1.quo(15),5=>5.quo(15),6=>2.quo(15),7=>1.quo(15), 8=>1.quo(15), 9=>1.quo(15),10=>1.quo(15)}, @c.proportions)
|
130
|
+
assert_equal(@c.proportion, 1.quo(15))
|
131
|
+
assert_equal(@c.proportion(2), 1.quo(15))
|
132
|
+
assert_equal([1,2,3,4,5,6,7,8,9,10], @c.factors.sort)
|
133
|
+
assert_equal(@c.mode,5)
|
134
|
+
assert_equal(@c.n_valid,15)
|
133
135
|
end
|
134
136
|
def test_equality
|
135
137
|
v1=[1,2,3].to_vector
|
@@ -328,4 +330,19 @@ class StatsampleVectorTestCase < Test::Unit::TestCase
|
|
328
330
|
expected=[2,2,2,4.5,4.5,6,7.5,7.5,10,10,10].to_vector(:ordinal)
|
329
331
|
assert_equal(expected,a.ranked)
|
330
332
|
end
|
333
|
+
def test_dichotomize
|
334
|
+
a= [0,0,0,1,2,3,nil].to_vector
|
335
|
+
exp=[0,0,0,1,1,1,nil].to_scale
|
336
|
+
assert_equal(exp,a.dichotomize)
|
337
|
+
a= [1,1,1,2,2,2,3].to_vector
|
338
|
+
exp=[0,0,0,1,1,1,1].to_scale
|
339
|
+
assert_equal(exp,a.dichotomize)
|
340
|
+
a= [0,0,0,1,2,3,nil].to_vector
|
341
|
+
exp=[0,0,0,0,1,1,nil].to_scale
|
342
|
+
assert_equal(exp,a.dichotomize(1))
|
343
|
+
a= %w{a a a b c d}.to_vector
|
344
|
+
exp=[0,0,0,1,1,1].to_scale
|
345
|
+
assert_equal(exp, a.dichotomize)
|
346
|
+
end
|
347
|
+
|
331
348
|
end
|