statsample 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -3
- data/History.txt +4 -0
- data/README.md +4 -0
- data/lib/statsample/converter/csv.rb +41 -54
- data/lib/statsample/converters.rb +18 -19
- data/lib/statsample/version.rb +1 -1
- data/test/fixtures/scientific_notation.csv +4 -0
- data/test/helpers_tests.rb +37 -38
- data/test/test_analysis.rb +96 -97
- data/test/test_anova_contrast.rb +22 -22
- data/test/test_anovaoneway.rb +12 -12
- data/test/test_anovatwoway.rb +16 -17
- data/test/test_anovatwowaywithdataset.rb +22 -24
- data/test/test_anovawithvectors.rb +67 -69
- data/test/test_awesome_print_bug.rb +9 -9
- data/test/test_bartlettsphericity.rb +13 -13
- data/test/test_bivariate.rb +122 -126
- data/test/test_codification.rb +51 -49
- data/test/test_crosstab.rb +44 -40
- data/test/test_csv.rb +52 -70
- data/test/test_dataset.rb +347 -330
- data/test/test_dominance_analysis.rb +22 -24
- data/test/test_factor.rb +163 -166
- data/test/test_factor_map.rb +25 -30
- data/test/test_factor_pa.rb +28 -28
- data/test/test_ggobi.rb +19 -18
- data/test/test_gsl.rb +13 -15
- data/test/test_histogram.rb +74 -77
- data/test/test_matrix.rb +29 -31
- data/test/test_multiset.rb +132 -126
- data/test/test_regression.rb +143 -149
- data/test/test_reliability.rb +149 -155
- data/test/test_reliability_icc.rb +100 -104
- data/test/test_reliability_skillscale.rb +38 -40
- data/test/test_resample.rb +14 -12
- data/test/test_rserve_extension.rb +33 -33
- data/test/test_srs.rb +5 -5
- data/test/test_statistics.rb +52 -50
- data/test/test_stest.rb +27 -28
- data/test/test_stratified.rb +10 -10
- data/test/test_test_f.rb +17 -17
- data/test/test_test_kolmogorovsmirnov.rb +21 -21
- data/test/test_test_t.rb +52 -52
- data/test/test_umannwhitney.rb +16 -16
- data/test/test_vector.rb +419 -410
- data/test/test_wilcoxonsignedrank.rb +60 -63
- data/test/test_xls.rb +41 -41
- metadata +55 -5
- data/web/Rakefile +0 -39
data/test/test_umannwhitney.rb
CHANGED
@@ -1,27 +1,27 @@
|
|
1
|
-
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
|
3
|
-
class StatsampleUMannWhitneyTestCase <
|
3
|
+
class StatsampleUMannWhitneyTestCase < Minitest::Test
|
4
4
|
include Statsample::Test
|
5
5
|
context Statsample::Test::UMannWhitney do
|
6
6
|
setup do
|
7
|
-
@v1=[1,2,3,4,7,8,9,10,14,15].to_scale
|
8
|
-
@v2=[5,6,11,12,13,16,17,18,19].to_scale
|
9
|
-
@u=Statsample::Test::UMannWhitney.new(@v1
|
7
|
+
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15].to_scale
|
8
|
+
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19].to_scale
|
9
|
+
@u = Statsample::Test::UMannWhitney.new(@v1, @v2)
|
10
10
|
end
|
11
|
-
should
|
12
|
-
assert_equal(Statsample::Test.u_mannwhitney(@v1
|
11
|
+
should 'have same result using class or Test#u_mannwhitney' do
|
12
|
+
assert_equal(Statsample::Test.u_mannwhitney(@v1, @v2).u, @u.u)
|
13
13
|
end
|
14
|
-
should
|
15
|
-
assert_equal(73
|
16
|
-
assert_equal(117
|
17
|
-
assert_equal(18
|
14
|
+
should 'have correct U values' do
|
15
|
+
assert_equal(73, @u.r1)
|
16
|
+
assert_equal(117, @u.r2)
|
17
|
+
assert_equal(18, @u.u)
|
18
18
|
end
|
19
|
-
should
|
20
|
-
assert_in_delta(-2.205
|
19
|
+
should 'have correct value for z' do
|
20
|
+
assert_in_delta(-2.205, @u.z, 0.001)
|
21
21
|
end
|
22
|
-
should
|
23
|
-
assert_in_delta(0.027
|
24
|
-
assert_in_delta(0.028
|
22
|
+
should 'have correct value for z and exact probability' do
|
23
|
+
assert_in_delta(0.027, @u.probability_z, 0.001)
|
24
|
+
assert_in_delta(0.028, @u.probability_exact, 0.001)
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end
|
data/test/test_vector.rb
CHANGED
@@ -1,383 +1,376 @@
|
|
1
|
-
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
|
3
|
-
class StatsampleTestVector <
|
3
|
+
class StatsampleTestVector < Minitest::Test
|
4
4
|
include Statsample::Shorthand
|
5
5
|
|
6
6
|
def setup
|
7
|
-
@c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil
|
8
|
-
@c.name=
|
9
|
-
@c.missing_values=[-99]
|
7
|
+
@c = Statsample::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], :nominal)
|
8
|
+
@c.name = 'Test Vector'
|
9
|
+
@c.missing_values = [-99]
|
10
10
|
end
|
11
|
+
|
11
12
|
def assert_counting_tokens(b)
|
12
|
-
assert_equal([1,1,0,1,0,nil],b['a'].to_a)
|
13
|
-
assert_equal([0,1,0,0,0,nil],b['b'].to_a)
|
14
|
-
assert_equal([0,0,1,0,0,nil],b['c'].to_a)
|
15
|
-
assert_equal([0,0,1,1,0,nil],b['d'].to_a)
|
16
|
-
assert_equal([0,0,0,0,1,nil],b[10].to_a)
|
13
|
+
assert_equal([1, 1, 0, 1, 0, nil], b['a'].to_a)
|
14
|
+
assert_equal([0, 1, 0, 0, 0, nil], b['b'].to_a)
|
15
|
+
assert_equal([0, 0, 1, 0, 0, nil], b['c'].to_a)
|
16
|
+
assert_equal([0, 0, 1, 1, 0, nil], b['d'].to_a)
|
17
|
+
assert_equal([0, 0, 0, 0, 1, nil], b[10].to_a)
|
17
18
|
end
|
18
19
|
context Statsample do
|
19
20
|
setup do
|
20
|
-
@sample=100
|
21
|
-
@a
|
22
|
-
@b
|
23
|
-
@correct_a=
|
24
|
-
@correct_b=
|
25
|
-
@a.each_with_index do |
|
21
|
+
@sample = 100
|
22
|
+
@a = @sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) }.to_scale
|
23
|
+
@b = @sample.times.map { |i| (i + rand(10)) % 10 == 0 ? nil : rand(100) }.to_scale
|
24
|
+
@correct_a = []
|
25
|
+
@correct_b = []
|
26
|
+
@a.each_with_index do |_v, i|
|
26
27
|
if !@a[i].nil? and !@b[i].nil?
|
27
28
|
@correct_a.push(@a[i])
|
28
29
|
@correct_b.push(@b[i])
|
29
30
|
end
|
30
31
|
end
|
31
|
-
@correct_a
|
32
|
-
@correct_b
|
33
|
-
|
34
|
-
@common=lambda do |av,bv|
|
35
|
-
assert_equal(@correct_a, av,
|
36
|
-
assert_equal(@correct_b, bv,
|
37
|
-
assert(!av.has_missing_data?,
|
38
|
-
assert(!bv.has_missing_data?,
|
32
|
+
@correct_a = @correct_a.to_scale
|
33
|
+
@correct_b = @correct_b.to_scale
|
34
|
+
|
35
|
+
@common = lambda do |av, bv|
|
36
|
+
assert_equal(@correct_a, av, 'A no es esperado')
|
37
|
+
assert_equal(@correct_b, bv, 'B no es esperado')
|
38
|
+
assert(!av.has_missing_data?, 'A tiene datos faltantes')
|
39
|
+
assert(!bv.has_missing_data?, 'b tiene datos faltantes')
|
39
40
|
end
|
40
41
|
end
|
41
|
-
should
|
42
|
-
av,bv=Statsample.only_valid @a
|
43
|
-
av2,bv2=Statsample.only_valid av,bv
|
44
|
-
@common.call(av,bv)
|
45
|
-
assert_equal(av,av2)
|
46
|
-
assert_not_same(av,av2)
|
47
|
-
assert_not_same(bv,bv2)
|
48
|
-
end
|
49
|
-
should
|
50
|
-
av,bv=Statsample.only_valid_clone @a
|
51
|
-
@common.call(av,bv)
|
52
|
-
av2,bv2=Statsample.only_valid_clone av,bv
|
53
|
-
assert_equal(av,av2)
|
54
|
-
assert_same(av,av2)
|
55
|
-
assert_same(bv,bv2)
|
42
|
+
should 'return correct only_valid' do
|
43
|
+
av, bv = Statsample.only_valid @a, @b
|
44
|
+
av2, bv2 = Statsample.only_valid av, bv
|
45
|
+
@common.call(av, bv)
|
46
|
+
assert_equal(av, av2)
|
47
|
+
assert_not_same(av, av2)
|
48
|
+
assert_not_same(bv, bv2)
|
49
|
+
end
|
50
|
+
should 'return correct only_valid_clone' do
|
51
|
+
av, bv = Statsample.only_valid_clone @a, @b
|
52
|
+
@common.call(av, bv)
|
53
|
+
av2, bv2 = Statsample.only_valid_clone av, bv
|
54
|
+
assert_equal(av, av2)
|
55
|
+
assert_same(av, av2)
|
56
|
+
assert_same(bv, bv2)
|
56
57
|
end
|
57
58
|
end
|
58
59
|
context Statsample::Vector do
|
59
60
|
setup do
|
60
|
-
@c = Statsample::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil
|
61
|
-
@c.name=
|
62
|
-
@c.missing_values=[-99]
|
63
|
-
end
|
64
|
-
should_with_gsl
|
65
|
-
gsl=GSL::Vector[1,2,3,4,5]
|
66
|
-
v=Statsample::Vector.new(gsl)
|
67
|
-
assert_equal([1,2,3,4,5], v.to_a)
|
61
|
+
@c = Statsample::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], :nominal)
|
62
|
+
@c.name = 'Test Vector'
|
63
|
+
@c.missing_values = [-99]
|
64
|
+
end
|
65
|
+
should_with_gsl 'be created with GSL::Vector' do
|
66
|
+
gsl = GSL::Vector[1, 2, 3, 4, 5]
|
67
|
+
v = Statsample::Vector.new(gsl)
|
68
|
+
assert_equal([1, 2, 3, 4, 5], v.to_a)
|
68
69
|
refute(v.flawed?)
|
69
|
-
|
70
70
|
end
|
71
71
|
|
72
|
-
context
|
72
|
+
context 'using matrix operations' do
|
73
73
|
setup do
|
74
|
-
@a=[1,2,3,4,5].to_scale
|
74
|
+
@a = [1, 2, 3, 4, 5].to_scale
|
75
75
|
end
|
76
|
-
should
|
77
|
-
mh=Matrix[[1,2,3,4,5]]
|
78
|
-
assert_equal(mh
|
76
|
+
should 'to_matrix returns a matrix with 1 row' do
|
77
|
+
mh = Matrix[[1, 2, 3, 4, 5]]
|
78
|
+
assert_equal(mh, @a.to_matrix)
|
79
79
|
end
|
80
|
-
should
|
81
|
-
mv=Matrix.columns([[1,2,3,4,5]])
|
82
|
-
assert_equal(mv
|
80
|
+
should 'to_matrix(:vertical) returns a matrix with 1 column' do
|
81
|
+
mv = Matrix.columns([[1, 2, 3, 4, 5]])
|
82
|
+
assert_equal(mv, @a.to_matrix(:vertical))
|
83
83
|
end
|
84
|
-
should
|
84
|
+
should 'returns valid submatrixes' do
|
85
85
|
# 3*4 + 2*5 = 22
|
86
|
-
a=[3,2].to_vector(:scale)
|
87
|
-
b=[4,5].to_vector(:scale)
|
88
|
-
assert_equal(22,(a.to_matrix*b.to_matrix(:vertical))[0,0])
|
86
|
+
a = [3, 2].to_vector(:scale)
|
87
|
+
b = [4, 5].to_vector(:scale)
|
88
|
+
assert_equal(22, (a.to_matrix * b.to_matrix(:vertical))[0, 0])
|
89
89
|
end
|
90
90
|
end
|
91
|
-
context
|
91
|
+
context 'when initializing' do
|
92
92
|
setup do
|
93
|
-
@data=(10.times.map{rand(100)})+[nil]
|
94
|
-
@original=Statsample::Vector.new(@data, :scale)
|
93
|
+
@data = (10.times.map { rand(100) }) + [nil]
|
94
|
+
@original = Statsample::Vector.new(@data, :scale)
|
95
95
|
end
|
96
|
-
should
|
97
|
-
second=Statsample::Vector[*@data]
|
96
|
+
should 'be the sample using []' do
|
97
|
+
second = Statsample::Vector[*@data]
|
98
98
|
assert_equal(@original, second)
|
99
99
|
end
|
100
|
-
should
|
101
|
-
reference=[0,4,5,6,10].to_scale
|
102
|
-
assert_equal(reference, Statsample::Vector[0,4,5,6,10])
|
103
|
-
assert_equal(reference, Statsample::Vector[0,4..6,10])
|
104
|
-
assert_equal(reference, Statsample::Vector[[0],[4,5,6],[10]])
|
105
|
-
assert_equal(reference, Statsample::Vector[[0],[4,[5,[6]]],[10]])
|
106
|
-
|
107
|
-
assert_equal(reference, Statsample::Vector[[0],[4,5,6].to_vector,[10]])
|
108
|
-
|
100
|
+
should '[] returns same results as R-c()' do
|
101
|
+
reference = [0, 4, 5, 6, 10].to_scale
|
102
|
+
assert_equal(reference, Statsample::Vector[0, 4, 5, 6, 10])
|
103
|
+
assert_equal(reference, Statsample::Vector[0, 4..6, 10])
|
104
|
+
assert_equal(reference, Statsample::Vector[[0], [4, 5, 6], [10]])
|
105
|
+
assert_equal(reference, Statsample::Vector[[0], [4, [5, [6]]], [10]])
|
106
|
+
|
107
|
+
assert_equal(reference, Statsample::Vector[[0], [4, 5, 6].to_vector, [10]])
|
109
108
|
end
|
110
|
-
should
|
111
|
-
lazy1
|
112
|
-
assert_equal(@original,lazy1)
|
109
|
+
should 'be the same usign #to_vector' do
|
110
|
+
lazy1 = @data.to_vector(:scale)
|
111
|
+
assert_equal(@original, lazy1)
|
113
112
|
end
|
114
|
-
should
|
115
|
-
lazy2
|
116
|
-
assert_equal(@original,lazy2)
|
117
|
-
assert_equal(:scale,lazy2.type)
|
118
|
-
assert_equal(@data.find_all{|v| !v.nil?},lazy2.valid_data)
|
113
|
+
should 'be the same using #to_scale' do
|
114
|
+
lazy2 = @data.to_scale
|
115
|
+
assert_equal(@original, lazy2)
|
116
|
+
assert_equal(:scale, lazy2.type)
|
117
|
+
assert_equal(@data.find_all { |v| !v.nil? }, lazy2.valid_data)
|
119
118
|
end
|
120
|
-
should
|
121
|
-
v1=10.times.map {nil}.to_scale
|
122
|
-
v2=Statsample::Vector.new_scale(10)
|
123
|
-
assert_equal(v1,v2)
|
124
|
-
|
119
|
+
should 'could use new_scale with size only' do
|
120
|
+
v1 = 10.times.map { nil }.to_scale
|
121
|
+
v2 = Statsample::Vector.new_scale(10)
|
122
|
+
assert_equal(v1, v2)
|
125
123
|
end
|
126
|
-
should
|
127
|
-
a=rand
|
128
|
-
v1=10.times.map {a}.to_scale
|
129
|
-
v2=Statsample::Vector.new_scale(10,a)
|
130
|
-
assert_equal(v1,v2)
|
124
|
+
should 'could use new_scale with size and value' do
|
125
|
+
a = rand
|
126
|
+
v1 = 10.times.map { a }.to_scale
|
127
|
+
v2 = Statsample::Vector.new_scale(10, a)
|
128
|
+
assert_equal(v1, v2)
|
131
129
|
end
|
132
|
-
should
|
133
|
-
v1=10.times.map {|i| i*2}.to_scale
|
134
|
-
v2=Statsample::Vector.new_scale(10) {|i| i*2}
|
135
|
-
assert_equal(v1,v2)
|
130
|
+
should 'could use new_scale with func' do
|
131
|
+
v1 = 10.times.map { |i| i * 2 }.to_scale
|
132
|
+
v2 = Statsample::Vector.new_scale(10) { |i| i * 2 }
|
133
|
+
assert_equal(v1, v2)
|
136
134
|
end
|
137
|
-
|
138
135
|
end
|
139
136
|
|
140
|
-
context
|
141
|
-
|
137
|
+
context '#split_by_separator' do
|
142
138
|
setup do
|
143
|
-
@a = Statsample::Vector.new([
|
144
|
-
@b
|
139
|
+
@a = Statsample::Vector.new(['a', 'a,b', 'c,d', 'a,d', 10, nil], :nominal)
|
140
|
+
@b = @a.split_by_separator(',')
|
145
141
|
end
|
146
|
-
should
|
142
|
+
should 'returns a Hash' do
|
147
143
|
assert_kind_of(Hash, @b)
|
148
144
|
end
|
149
|
-
should
|
150
|
-
expected=['a','b','c','d',10]
|
145
|
+
should 'return a Hash with keys with different values of @a' do
|
146
|
+
expected = ['a', 'b', 'c', 'd', 10]
|
151
147
|
assert_equal(expected, @b.keys)
|
152
148
|
end
|
153
149
|
|
154
|
-
should
|
155
|
-
@b.each_key {|k| assert_instance_of(Statsample::Vector, @b[k])}
|
150
|
+
should 'returns a Hash, which values are Statsample::Vector' do
|
151
|
+
@b.each_key { |k| assert_instance_of(Statsample::Vector, @b[k]) }
|
156
152
|
end
|
157
|
-
should
|
153
|
+
should 'hash values are n times the tokens appears' do
|
158
154
|
assert_counting_tokens(@b)
|
159
155
|
end
|
160
|
-
should
|
161
|
-
assert_equal({'a'=>3,'b'=>1,'c'=>1,'d'=>2,10=>1}, @a.split_by_separator_freq
|
156
|
+
should '#split_by_separator_freq returns the number of ocurrences of tokens' do
|
157
|
+
assert_equal({ 'a' => 3, 'b' => 1, 'c' => 1, 'd' => 2, 10 => 1 }, @a.split_by_separator_freq)
|
162
158
|
end
|
163
|
-
should
|
164
|
-
a = Statsample::Vector.new([
|
165
|
-
b=a.split_by_separator(
|
159
|
+
should 'using a different separator give the same values' do
|
160
|
+
a = Statsample::Vector.new(['a', 'a*b', 'c*d', 'a*d', 10, nil], :nominal)
|
161
|
+
b = a.split_by_separator('*')
|
166
162
|
assert_counting_tokens(b)
|
167
163
|
end
|
168
164
|
end
|
169
|
-
should
|
170
|
-
a=[1, 1, 2, 2, 4, 6, 9].to_scale
|
165
|
+
should 'return correct median_absolute_deviation' do
|
166
|
+
a = [1, 1, 2, 2, 4, 6, 9].to_scale
|
171
167
|
assert_equal(1, a.median_absolute_deviation)
|
172
168
|
end
|
173
|
-
should
|
174
|
-
a=10.times.map {|v| v}.to_scale
|
175
|
-
hist=a.histogram(2)
|
176
|
-
assert_equal([5,5], hist.bin)
|
169
|
+
should 'return correct histogram' do
|
170
|
+
a = 10.times.map { |v| v }.to_scale
|
171
|
+
hist = a.histogram(2)
|
172
|
+
assert_equal([5, 5], hist.bin)
|
177
173
|
3.times do |i|
|
178
|
-
assert_in_delta(i*4.5, hist.get_range(i)[0], 1e-9)
|
174
|
+
assert_in_delta(i * 4.5, hist.get_range(i)[0], 1e-9)
|
179
175
|
end
|
180
|
-
|
181
176
|
end
|
182
|
-
should
|
183
|
-
@c.name==
|
177
|
+
should 'have a name' do
|
178
|
+
@c.name == 'Test Vector'
|
184
179
|
end
|
185
|
-
should
|
186
|
-
a=10.times.map{rand(100)}.to_scale
|
187
|
-
b=10.times.map{rand(100)}.to_scale
|
180
|
+
should 'without explicit name, returns vector with succesive numbers' do
|
181
|
+
a = 10.times.map { rand(100) }.to_scale
|
182
|
+
b = 10.times.map { rand(100) }.to_scale
|
188
183
|
assert_match(/Vector \d+/, a.name)
|
189
|
-
a.name
|
190
|
-
next_number
|
191
|
-
assert_equal("Vector #{next_number}",b.name)
|
184
|
+
a.name =~ /Vector (\d+)/
|
185
|
+
next_number = Regexp.last_match(1).to_i + 1
|
186
|
+
assert_equal("Vector #{next_number}", b.name)
|
192
187
|
end
|
193
|
-
should
|
194
|
-
outfile=Tempfile.new(
|
188
|
+
should 'save to a file and load the same Vector' do
|
189
|
+
outfile = Tempfile.new('vector.vec')
|
195
190
|
@c.save(outfile.path)
|
196
|
-
a=Statsample.load(outfile.path)
|
197
|
-
assert_equal(@c,a)
|
198
|
-
end
|
199
|
-
should
|
200
|
-
val
|
201
|
-
assert_equal(val,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil
|
202
|
-
end
|
203
|
-
|
204
|
-
should
|
205
|
-
a
|
206
|
-
exp=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1].to_vector
|
207
|
-
assert_equal(exp,a)
|
208
|
-
exp.recode!{|v| v==0 ? 1:0}
|
209
|
-
exp2=(([1]*15)+([0]*3)).to_vector
|
210
|
-
assert_equal(exp2,exp)
|
211
|
-
end
|
212
|
-
should
|
213
|
-
a=[1,2,3,4,5].to_vector(:scale)
|
214
|
-
assert_equal(120,a.product)
|
215
|
-
end
|
216
|
-
|
217
|
-
should
|
218
|
-
@c.missing_values=[10]
|
219
|
-
assert_equal([-99
|
220
|
-
assert_equal([5,5,5,5,5,6,6,7,8,9,nil,1,2,3,4,nil
|
221
|
-
@c.missing_values=[-99]
|
222
|
-
assert_equal(@c.valid_data.sort,[1,2,3,4,5,5,5,5,5,6,6,7,8,9,10])
|
223
|
-
assert_equal(@c.data_with_nils,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,nil,nil])
|
224
|
-
@c.missing_values=[]
|
225
|
-
assert_equal(@c.valid_data.sort,[-99
|
226
|
-
assert_equal(@c.data_with_nils,[5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
a=[1,2,3,nil].to_vector
|
191
|
+
a = Statsample.load(outfile.path)
|
192
|
+
assert_equal(@c, a)
|
193
|
+
end
|
194
|
+
should '#collect returns an array' do
|
195
|
+
val = @c.collect { |v| v }
|
196
|
+
assert_equal(val, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
|
197
|
+
end
|
198
|
+
|
199
|
+
should '#recode returns a recoded array' do
|
200
|
+
a = @c.recode { |v| @c.is_valid?(v) ? 0 : 1 }
|
201
|
+
exp = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1].to_vector
|
202
|
+
assert_equal(exp, a)
|
203
|
+
exp.recode! { |v| v == 0 ? 1 : 0 }
|
204
|
+
exp2 = (([1] * 15) + ([0] * 3)).to_vector
|
205
|
+
assert_equal(exp2, exp)
|
206
|
+
end
|
207
|
+
should '#product returns the * of all values' do
|
208
|
+
a = [1, 2, 3, 4, 5].to_vector(:scale)
|
209
|
+
assert_equal(120, a.product)
|
210
|
+
end
|
211
|
+
|
212
|
+
should 'missing values' do
|
213
|
+
@c.missing_values = [10]
|
214
|
+
assert_equal([-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9], @c.valid_data.sort)
|
215
|
+
assert_equal([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, nil, 1, 2, 3, 4, nil, -99, -99], @c.data_with_nils)
|
216
|
+
@c.missing_values = [-99]
|
217
|
+
assert_equal(@c.valid_data.sort, [1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10])
|
218
|
+
assert_equal(@c.data_with_nils, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, nil, nil])
|
219
|
+
@c.missing_values = []
|
220
|
+
assert_equal(@c.valid_data.sort, [-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10])
|
221
|
+
assert_equal(@c.data_with_nils, [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
|
222
|
+
end
|
223
|
+
should 'correct has_missing_data? with missing data' do
|
224
|
+
a = [1, 2, 3, nil].to_vector
|
231
225
|
assert(a.has_missing_data?)
|
232
226
|
end
|
233
|
-
should
|
234
|
-
a=[1,2,3,4,10].to_vector
|
227
|
+
should 'correct has_missing_data? without missing data' do
|
228
|
+
a = [1, 2, 3, 4, 10].to_vector
|
235
229
|
assert(!a.has_missing_data?)
|
236
230
|
end
|
237
|
-
should
|
238
|
-
a=[1,2,3,4,10].to_vector
|
239
|
-
a.missing_values=[10]
|
231
|
+
should 'with explicit missing_values, should respond has_missing_data?' do
|
232
|
+
a = [1, 2, 3, 4, 10].to_vector
|
233
|
+
a.missing_values = [10]
|
240
234
|
assert(a.has_missing_data?)
|
241
235
|
end
|
242
|
-
should
|
243
|
-
@c.labels={5=>'FIVE'}
|
244
|
-
assert_equal([
|
236
|
+
should 'label correctly fields' do
|
237
|
+
@c.labels = { 5 => 'FIVE' }
|
238
|
+
assert_equal(['FIVE', 'FIVE', 'FIVE', 'FIVE', 'FIVE', 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99], @c.vector_labeled.to_a)
|
245
239
|
end
|
246
|
-
should
|
247
|
-
h
|
248
|
-
e={15=>nil,16
|
249
|
-
assert_equal(e,h)
|
240
|
+
should 'verify' do
|
241
|
+
h = @c.verify { |d| !d.nil? and d > 0 }
|
242
|
+
e = { 15 => nil, 16 => -99, 17 => -99 }
|
243
|
+
assert_equal(e, h)
|
250
244
|
end
|
251
|
-
should
|
245
|
+
should 'have a summary with name on it' do
|
252
246
|
assert_match(/#{@c.name}/, @c.summary)
|
253
247
|
end
|
254
248
|
|
255
|
-
should
|
249
|
+
should 'GSL::Vector based should push correcty' do
|
256
250
|
if Statsample.has_gsl?
|
257
|
-
v=GSL::Vector[1,2,3,4,5].to_scale
|
251
|
+
v = GSL::Vector[1, 2, 3, 4, 5].to_scale
|
258
252
|
v.push(nil)
|
259
|
-
assert_equal([1,2,3,4,5,nil], v.to_a)
|
253
|
+
assert_equal([1, 2, 3, 4, 5, nil], v.to_a)
|
260
254
|
assert(v.flawed?)
|
261
255
|
else
|
262
|
-
skip(
|
256
|
+
skip('Requires GSL')
|
263
257
|
end
|
264
258
|
end
|
265
259
|
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
assert_equal([%w{a},%w{a b},%w{c d},%w{a d},%w{d},[10],nil], a.splitted)
|
260
|
+
should 'split correctly' do
|
261
|
+
a = Statsample::Vector.new(['a', 'a,b', 'c,d', 'a,d', 'd', 10, nil], :nominal)
|
262
|
+
assert_equal([%w(a), %w(a b), %w(c d), %w(a d), %w(d), [10], nil], a.splitted)
|
270
263
|
end
|
271
|
-
should
|
272
|
-
a = [1,2,3].to_scale
|
273
|
-
assert_equal([5,10,15].to_scale, a*5)
|
264
|
+
should 'multiply correct for scalar' do
|
265
|
+
a = [1, 2, 3].to_scale
|
266
|
+
assert_equal([5, 10, 15].to_scale, a * 5)
|
274
267
|
end
|
275
|
-
should
|
276
|
-
a = [1,2,3].to_scale
|
277
|
-
b = [2,4,6].to_scale
|
268
|
+
should 'multiply correct with other vector' do
|
269
|
+
a = [1, 2, 3].to_scale
|
270
|
+
b = [2, 4, 6].to_scale
|
278
271
|
|
279
|
-
assert_equal([2,8,18].to_scale, a*b)
|
272
|
+
assert_equal([2, 8, 18].to_scale, a * b)
|
280
273
|
end
|
281
|
-
should
|
282
|
-
a = [1,2,3].to_scale
|
283
|
-
assert_equal([11,12,13].to_scale, a+10)
|
274
|
+
should 'sum correct for scalar' do
|
275
|
+
a = [1, 2, 3].to_scale
|
276
|
+
assert_equal([11, 12, 13].to_scale, a + 10)
|
284
277
|
end
|
285
278
|
|
286
|
-
should
|
287
|
-
@c.type
|
279
|
+
should 'raise NoMethodError when method requires ordinal and vector is nominal' do
|
280
|
+
@c.type = :nominal
|
288
281
|
assert_raise(::NoMethodError) { @c.median }
|
289
282
|
end
|
290
283
|
|
291
|
-
should
|
292
|
-
@c.type
|
284
|
+
should 'raise NoMethodError when method requires scalar and vector is ordinal' do
|
285
|
+
@c.type = :ordinal
|
293
286
|
assert_raise(::NoMethodError) { @c.mean }
|
294
287
|
end
|
295
|
-
should
|
288
|
+
should 'jacknife correctly with named method' do
|
296
289
|
# First example
|
297
|
-
a=[1,2,3,4].to_scale
|
298
|
-
ds=a.jacknife(:mean)
|
290
|
+
a = [1, 2, 3, 4].to_scale
|
291
|
+
ds = a.jacknife(:mean)
|
299
292
|
assert_equal(a.mean, ds[:mean].mean)
|
300
|
-
ds=a.jacknife([:mean
|
293
|
+
ds = a.jacknife([:mean, :sd])
|
301
294
|
assert_equal(a.mean, ds[:mean].mean)
|
302
295
|
assert_equal(a.sd, ds[:mean].sd)
|
303
296
|
end
|
304
|
-
should
|
297
|
+
should 'jacknife correctly with custom method' do
|
305
298
|
# Second example
|
306
|
-
a=[17.23, 18.71,13.93,18.81,15.78,11.29,14.91,13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52,13.45,15.25].to_scale
|
307
|
-
ds=a.jacknife(:
|
308
|
-
exp=[1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937].to_scale
|
299
|
+
a = [17.23, 18.71, 13.93, 18.81, 15.78, 11.29, 14.91, 13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52, 13.45, 15.25].to_scale
|
300
|
+
ds = a.jacknife(log_s2: ->(v) { Math.log(v.variance) })
|
301
|
+
exp = [1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937].to_scale
|
309
302
|
|
310
303
|
assert_similar_vector(exp, ds[:log_s2], 0.001)
|
311
304
|
assert_in_delta(2.00389, ds[:log_s2].mean, 0.00001)
|
312
305
|
assert_in_delta(1.091, ds[:log_s2].variance, 0.001)
|
313
306
|
end
|
314
|
-
should
|
315
|
-
a=rnorm(6)
|
316
|
-
ds=a.jacknife(:mean,2)
|
317
|
-
mean=a.mean
|
318
|
-
exp=[3*mean-2*(a[2]+a[3]+a[4]+a[5]) / 4, 3*mean-2*(a[0]+a[1]+a[4]+a[5]) / 4, 3*mean-2*(a[0]+a[1]+a[2]+a[3]) / 4].to_scale
|
307
|
+
should 'jacknife correctly with k>1' do
|
308
|
+
a = rnorm(6)
|
309
|
+
ds = a.jacknife(:mean, 2)
|
310
|
+
mean = a.mean
|
311
|
+
exp = [3 * mean - 2 * (a[2] + a[3] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[2] + a[3]) / 4].to_scale
|
319
312
|
assert_similar_vector(exp, ds[:mean], 1e-13)
|
320
313
|
end
|
321
|
-
should
|
322
|
-
a=rnorm(100)
|
323
|
-
ds=a.bootstrap([:mean
|
324
|
-
se=1/Math.sqrt(a.size)
|
314
|
+
should 'bootstrap should return a vector with mean=mu and sd=se' do
|
315
|
+
a = rnorm(100)
|
316
|
+
ds = a.bootstrap([:mean, :sd], 200)
|
317
|
+
se = 1 / Math.sqrt(a.size)
|
325
318
|
assert_in_delta(0, ds[:mean].mean, 0.3)
|
326
319
|
assert_in_delta(se, ds[:mean].sd, 0.02)
|
327
320
|
end
|
328
|
-
|
329
|
-
|
330
321
|
end
|
331
322
|
|
332
|
-
|
333
|
-
|
334
323
|
def test_nominal
|
335
|
-
assert_equal(@c[1],5)
|
336
|
-
assert_equal({ 1=>1,2=>1,3=>1,4=>1,5=>5,6=>2,7=>1,8=>1, 9=>1,10=>1}
|
337
|
-
assert_equal({ 1=>1,2=>1,3=>1,4=>1,5=>5,6=>2,7=>1,8=>1, 9=>1,10=>1}
|
338
|
-
assert_equal({ 1 => 1.quo(15)
|
324
|
+
assert_equal(@c[1], 5)
|
325
|
+
assert_equal({ 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 5, 6 => 2, 7 => 1, 8 => 1, 9 => 1, 10 => 1 }, @c.frequencies)
|
326
|
+
assert_equal({ 1 => 1, 2 => 1, 3 => 1, 4 => 1, 5 => 5, 6 => 2, 7 => 1, 8 => 1, 9 => 1, 10 => 1 }, @c._frequencies)
|
327
|
+
assert_equal({ 1 => 1.quo(15), 2 => 1.quo(15), 3 => 1.quo(15), 4 => 1.quo(15), 5 => 5.quo(15), 6 => 2.quo(15), 7 => 1.quo(15), 8 => 1.quo(15), 9 => 1.quo(15), 10 => 1.quo(15) }, @c.proportions)
|
339
328
|
assert_equal(@c.proportion, 1.quo(15))
|
340
329
|
assert_equal(@c.proportion(2), 1.quo(15))
|
341
|
-
assert_equal([1,2,3,4,5,6,7,8,9,10], @c.factors.sort)
|
342
|
-
assert_equal(@c.mode,5)
|
343
|
-
assert_equal(@c.n_valid,15)
|
330
|
+
assert_equal([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], @c.factors.sort)
|
331
|
+
assert_equal(@c.mode, 5)
|
332
|
+
assert_equal(@c.n_valid, 15)
|
344
333
|
end
|
334
|
+
|
345
335
|
def test_equality
|
346
|
-
v1=[1,2,3].to_vector
|
347
|
-
v2=[1,2,3].to_vector
|
348
|
-
assert_equal(v1,v2)
|
349
|
-
v1=[1,2,3].to_vector(:nominal)
|
350
|
-
v2=[1,2,3].to_vector(:ordinal)
|
351
|
-
assert_not_equal(v1,v2)
|
352
|
-
v2=[1,2,3]
|
353
|
-
assert_not_equal(v1,v2)
|
354
|
-
v1=[1,2,3].to_vector
|
355
|
-
v2=[1,2,3].to_vector
|
356
|
-
assert_equal(v1,v2)
|
336
|
+
v1 = [1, 2, 3].to_vector
|
337
|
+
v2 = [1, 2, 3].to_vector
|
338
|
+
assert_equal(v1, v2)
|
339
|
+
v1 = [1, 2, 3].to_vector(:nominal)
|
340
|
+
v2 = [1, 2, 3].to_vector(:ordinal)
|
341
|
+
assert_not_equal(v1, v2)
|
342
|
+
v2 = [1, 2, 3]
|
343
|
+
assert_not_equal(v1, v2)
|
344
|
+
v1 = [1, 2, 3].to_vector
|
345
|
+
v2 = [1, 2, 3].to_vector
|
346
|
+
assert_equal(v1, v2)
|
357
347
|
assert_equal(false, v1 == Object.new)
|
358
348
|
end
|
349
|
+
|
359
350
|
def test_vector_percentil
|
360
|
-
a=[1,2,2,3,4,5,5,5,6,10].to_scale
|
361
|
-
expected=[10,25,25,40,50,70,70,70,90,100].to_scale
|
351
|
+
a = [1, 2, 2, 3, 4, 5, 5, 5, 6, 10].to_scale
|
352
|
+
expected = [10, 25, 25, 40, 50, 70, 70, 70, 90, 100].to_scale
|
362
353
|
assert_equal(expected, a.vector_percentil)
|
363
|
-
a=[1,nil,nil,2,2,3,4,nil,nil,5,5,5,6,10].to_scale
|
364
|
-
expected=[10,nil,nil,25,25,40,50,nil,nil,70,70,70,90,100].to_scale
|
354
|
+
a = [1, nil, nil, 2, 2, 3, 4, nil, nil, 5, 5, 5, 6, 10].to_scale
|
355
|
+
expected = [10, nil, nil, 25, 25, 40, 50, nil, nil, 70, 70, 70, 90, 100].to_scale
|
365
356
|
assert_equal(expected, a.vector_percentil)
|
366
357
|
end
|
358
|
+
|
367
359
|
def test_ordinal
|
368
|
-
@c.type
|
369
|
-
assert_equal(5
|
370
|
-
assert_equal(4
|
371
|
-
assert_equal(7
|
372
|
-
|
373
|
-
v=[
|
374
|
-
assert_equal(
|
375
|
-
a=[7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 12.0, 12.0, 13.0, 14.0, 14.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0].to_scale
|
360
|
+
@c.type = :ordinal
|
361
|
+
assert_equal(5, @c.median)
|
362
|
+
assert_equal(4, @c.percentil(25))
|
363
|
+
assert_equal(7, @c.percentil(75))
|
364
|
+
|
365
|
+
v = [200_000, 200_000, 210_000, 220_000, 230_000, 250_000, 250_000, 250_000, 270_000, 300_000, 450_000, 130_000, 140_000, 140_000, 140_000, 145_000, 148_000, 165_000, 170_000, 180_000, 180_000, 180_000, 180_000, 180_000, 180_000].to_scale
|
366
|
+
assert_equal(180_000, v.median)
|
367
|
+
a = [7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 12.0, 12.0, 13.0, 14.0, 14.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0].to_scale
|
376
368
|
assert_equal(4.5, a.percentil(25))
|
377
369
|
assert_equal(6.5, a.percentil(50))
|
378
370
|
assert_equal(9.5, a.percentil(75))
|
379
371
|
assert_equal(3.0, a.percentil(10))
|
380
372
|
end
|
373
|
+
|
381
374
|
def test_linear_percentil_strategy
|
382
375
|
values = [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle.to_scale
|
383
376
|
assert_equal 102, values.percentil(0, :linear)
|
@@ -393,252 +386,268 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
393
386
|
assert_equal 115, values.percentil(75, :linear)
|
394
387
|
assert_equal 118, values.percentil(100, :linear)
|
395
388
|
end
|
389
|
+
|
396
390
|
def test_ranked
|
397
|
-
v1=[0.8,1.2,1.2,2.3,18].to_vector(:ordinal)
|
398
|
-
expected=[1,2.5,2.5,4,5].to_vector(:ordinal)
|
399
|
-
assert_equal(expected,v1.ranked)
|
400
|
-
v1=[nil,0.8,1.2,1.2,2.3,18,nil].to_vector(:ordinal)
|
401
|
-
expected=[nil,1,2.5,2.5,4,5,nil].to_vector(:ordinal)
|
402
|
-
assert_equal(expected,v1.ranked)
|
391
|
+
v1 = [0.8, 1.2, 1.2, 2.3, 18].to_vector(:ordinal)
|
392
|
+
expected = [1, 2.5, 2.5, 4, 5].to_vector(:ordinal)
|
393
|
+
assert_equal(expected, v1.ranked)
|
394
|
+
v1 = [nil, 0.8, 1.2, 1.2, 2.3, 18, nil].to_vector(:ordinal)
|
395
|
+
expected = [nil, 1, 2.5, 2.5, 4, 5, nil].to_vector(:ordinal)
|
396
|
+
assert_equal(expected, v1.ranked)
|
403
397
|
end
|
398
|
+
|
404
399
|
def test_scale
|
405
|
-
a=Statsample::Vector.new([1,2,3,4,
|
400
|
+
a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'], :scale)
|
406
401
|
assert_equal(10, a.sum)
|
407
|
-
i=0
|
408
|
-
factors=a.factors.sort
|
409
|
-
[0,1,2,3,4].each{|v|
|
410
|
-
assert(v==factors[i])
|
411
|
-
assert(v.class==factors[i].class,"#{v} - #{v.class} != #{factors[i]} - #{factors[i].class}")
|
412
|
-
i+=1
|
402
|
+
i = 0
|
403
|
+
factors = a.factors.sort
|
404
|
+
[0, 1, 2, 3, 4].each{|v|
|
405
|
+
assert(v == factors[i])
|
406
|
+
assert(v.class == factors[i].class, "#{v} - #{v.class} != #{factors[i]} - #{factors[i].class}")
|
407
|
+
i += 1
|
413
408
|
}
|
414
409
|
end
|
410
|
+
|
415
411
|
def test_vector_centered
|
416
|
-
mean=rand
|
417
|
-
samples=11
|
418
|
-
centered=samples.times.map {|i| i-((samples/2).floor).to_i}.to_scale
|
419
|
-
not_centered=centered.recode {|v| v+mean}
|
420
|
-
obs=not_centered.centered
|
421
|
-
centered.each_with_index do |v,i|
|
422
|
-
assert_in_delta(v,obs[i],0.0001)
|
412
|
+
mean = rand
|
413
|
+
samples = 11
|
414
|
+
centered = samples.times.map { |i| i - ((samples / 2).floor).to_i }.to_scale
|
415
|
+
not_centered = centered.recode { |v| v + mean }
|
416
|
+
obs = not_centered.centered
|
417
|
+
centered.each_with_index do |v, i|
|
418
|
+
assert_in_delta(v, obs[i], 0.0001)
|
423
419
|
end
|
424
420
|
end
|
421
|
+
|
425
422
|
def test_vector_standarized
|
426
|
-
v1=[1,2,3,4,nil].to_vector(:scale)
|
427
|
-
sds=v1.sds
|
428
|
-
expected=[((1-2.5).quo(sds)),((2-2.5).quo(sds)),((3-2.5).quo(sds)),((4-2.5).quo(sds)), nil].to_vector(:scale)
|
429
|
-
vs=v1.vector_standarized
|
423
|
+
v1 = [1, 2, 3, 4, nil].to_vector(:scale)
|
424
|
+
sds = v1.sds
|
425
|
+
expected = [((1 - 2.5).quo(sds)), ((2 - 2.5).quo(sds)), ((3 - 2.5).quo(sds)), ((4 - 2.5).quo(sds)), nil].to_vector(:scale)
|
426
|
+
vs = v1.vector_standarized
|
430
427
|
assert_equal(expected, vs)
|
431
|
-
assert_equal(0,vs.mean)
|
432
|
-
assert_equal(1,vs.sds)
|
428
|
+
assert_equal(0, vs.mean)
|
429
|
+
assert_equal(1, vs.sds)
|
433
430
|
end
|
434
431
|
|
435
432
|
def test_vector_standarized_with_zero_variance
|
436
|
-
v1=100.times.map {|
|
437
|
-
exp=100.times.map {nil}.to_scale
|
438
|
-
assert_equal(exp,v1.standarized)
|
433
|
+
v1 = 100.times.map { |_i| 1 }.to_scale
|
434
|
+
exp = 100.times.map { nil }.to_scale
|
435
|
+
assert_equal(exp, v1.standarized)
|
439
436
|
end
|
440
437
|
|
441
|
-
|
442
|
-
v=Statsample::Vector.new
|
443
|
-
v.type
|
444
|
-
assert_raise(NoMethodError) { v.check_type(:scale)}
|
445
|
-
assert_raise(NoMethodError) { v.check_type(:ordinal)}
|
438
|
+
def test_check_type
|
439
|
+
v = Statsample::Vector.new
|
440
|
+
v.type = :nominal
|
441
|
+
assert_raise(NoMethodError) { v.check_type(:scale) }
|
442
|
+
assert_raise(NoMethodError) { v.check_type(:ordinal) }
|
446
443
|
assert(v.check_type(:nominal).nil?)
|
447
444
|
|
448
|
-
v.type
|
445
|
+
v.type = :ordinal
|
449
446
|
|
450
|
-
assert_raise(NoMethodError) { v.check_type(:scale)}
|
447
|
+
assert_raise(NoMethodError) { v.check_type(:scale) }
|
451
448
|
|
452
449
|
assert(v.check_type(:ordinal).nil?)
|
453
450
|
assert(v.check_type(:nominal).nil?)
|
454
451
|
|
455
|
-
|
456
|
-
v.type=:scale
|
452
|
+
v.type = :scale
|
457
453
|
assert(v.check_type(:scale).nil?)
|
458
454
|
assert(v.check_type(:ordinal).nil?)
|
459
455
|
assert(v.check_type(:nominal).nil?)
|
460
456
|
|
461
|
-
v.type
|
462
|
-
assert_raise(NoMethodError) { v.check_type(:scale)}
|
463
|
-
assert_raise(NoMethodError) { v.check_type(:ordinal)}
|
464
|
-
assert_raise(NoMethodError) { v.check_type(:nominal)}
|
465
|
-
|
457
|
+
v.type = :date
|
458
|
+
assert_raise(NoMethodError) { v.check_type(:scale) }
|
459
|
+
assert_raise(NoMethodError) { v.check_type(:ordinal) }
|
460
|
+
assert_raise(NoMethodError) { v.check_type(:nominal) }
|
461
|
+
end
|
466
462
|
|
467
463
|
def test_add
|
468
|
-
a=Statsample::Vector.new([1,2,3,4,5], :scale)
|
469
|
-
b=Statsample::Vector.new([11,12,13,14,15], :scale)
|
470
|
-
assert_equal([3,4,5,6,7], (a+2).to_a)
|
471
|
-
assert_equal([12,14,16,18,20], (a+b).to_a)
|
472
|
-
assert_raise
|
464
|
+
a = Statsample::Vector.new([1, 2, 3, 4, 5], :scale)
|
465
|
+
b = Statsample::Vector.new([11, 12, 13, 14, 15], :scale)
|
466
|
+
assert_equal([3, 4, 5, 6, 7], (a + 2).to_a)
|
467
|
+
assert_equal([12, 14, 16, 18, 20], (a + b).to_a)
|
468
|
+
assert_raise ArgumentError do
|
473
469
|
a + @c
|
474
470
|
end
|
475
|
-
assert_raise
|
476
|
-
a+
|
471
|
+
assert_raise TypeError do
|
472
|
+
a + 'string'
|
477
473
|
end
|
478
|
-
a=Statsample::Vector.new([nil,1, 2
|
479
|
-
b=Statsample::Vector.new([11, 12,nil,13,14,15], :scale)
|
480
|
-
assert_equal([nil,13,nil,16,18,20], (a+b).to_a)
|
481
|
-
assert_equal([nil,13,nil,16,18,20], (a+b.to_a).to_a)
|
474
|
+
a = Statsample::Vector.new([nil, 1, 2, 3, 4, 5], :scale)
|
475
|
+
b = Statsample::Vector.new([11, 12, nil, 13, 14, 15], :scale)
|
476
|
+
assert_equal([nil, 13, nil, 16, 18, 20], (a + b).to_a)
|
477
|
+
assert_equal([nil, 13, nil, 16, 18, 20], (a + b.to_a).to_a)
|
482
478
|
end
|
479
|
+
|
483
480
|
def test_minus
|
484
|
-
a=Statsample::Vector.new([1,2,3,4,5], :scale)
|
485
|
-
b=Statsample::Vector.new([11,12,13,14,15], :scale)
|
486
|
-
assert_equal([-1,0,1,2,3], (a-2).to_a)
|
487
|
-
assert_equal([10,10,10,10,10], (b-a).to_a)
|
488
|
-
assert_raise
|
489
|
-
a
|
490
|
-
end
|
491
|
-
assert_raise
|
492
|
-
a-
|
493
|
-
end
|
494
|
-
a=Statsample::Vector.new([nil,1, 2
|
495
|
-
b=Statsample::Vector.new([11, 12,nil,13,14,15], :scale)
|
496
|
-
assert_equal([nil,11,nil,10,10,10], (b-a).to_a)
|
497
|
-
assert_equal([nil,11,nil,10,10,10], (b-a.to_a).to_a)
|
481
|
+
a = Statsample::Vector.new([1, 2, 3, 4, 5], :scale)
|
482
|
+
b = Statsample::Vector.new([11, 12, 13, 14, 15], :scale)
|
483
|
+
assert_equal([-1, 0, 1, 2, 3], (a - 2).to_a)
|
484
|
+
assert_equal([10, 10, 10, 10, 10], (b - a).to_a)
|
485
|
+
assert_raise ArgumentError do
|
486
|
+
a - @c
|
487
|
+
end
|
488
|
+
assert_raise TypeError do
|
489
|
+
a - 'string'
|
490
|
+
end
|
491
|
+
a = Statsample::Vector.new([nil, 1, 2, 3, 4, 5], :scale)
|
492
|
+
b = Statsample::Vector.new([11, 12, nil, 13, 14, 15], :scale)
|
493
|
+
assert_equal([nil, 11, nil, 10, 10, 10], (b - a).to_a)
|
494
|
+
assert_equal([nil, 11, nil, 10, 10, 10], (b - a.to_a).to_a)
|
498
495
|
end
|
496
|
+
|
499
497
|
def test_sum_of_squares
|
500
|
-
a=[1,2,3,4,5,6].to_vector(:scale)
|
498
|
+
a = [1, 2, 3, 4, 5, 6].to_vector(:scale)
|
501
499
|
assert_equal(17.5, a.sum_of_squared_deviation)
|
502
500
|
end
|
501
|
+
|
503
502
|
def test_average_deviation
|
504
|
-
a=[1,2,3,4,5,6,7,8,9].to_scale
|
503
|
+
a = [1, 2, 3, 4, 5, 6, 7, 8, 9].to_scale
|
505
504
|
assert_equal(20.quo(9), a.average_deviation_population)
|
506
505
|
end
|
506
|
+
|
507
507
|
def test_samples
|
508
508
|
srand(1)
|
509
|
-
assert_equal(100
|
509
|
+
assert_equal(100, @c.sample_with_replacement(100).size)
|
510
510
|
assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
|
511
|
-
assert_raise
|
511
|
+
assert_raise ArgumentError do
|
512
512
|
@c.sample_without_replacement(20)
|
513
513
|
end
|
514
|
-
@c.type
|
514
|
+
@c.type = :scale
|
515
515
|
srand(1)
|
516
516
|
assert_equal(100, @c.sample_with_replacement(100).size)
|
517
517
|
assert_equal(@c.valid_data.to_a.sort, @c.sample_without_replacement(15).sort)
|
518
|
-
|
519
518
|
end
|
519
|
+
|
520
520
|
def test_valid_data
|
521
|
-
a=Statsample::Vector.new([1,2,3,4,
|
522
|
-
a.missing_values=[-99]
|
523
|
-
a.add(1,false)
|
524
|
-
a.add(2,false)
|
525
|
-
a.add(-99,false)
|
521
|
+
a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'])
|
522
|
+
a.missing_values = [-99]
|
523
|
+
a.add(1, false)
|
524
|
+
a.add(2, false)
|
525
|
+
a.add(-99, false)
|
526
526
|
a.set_valid_data
|
527
|
-
exp_valid_data=[1,2,3,4,
|
528
|
-
assert_equal(exp_valid_data,a.valid_data)
|
529
|
-
a.add(20,false)
|
530
|
-
a.add(30,false)
|
531
|
-
assert_equal(exp_valid_data,a.valid_data)
|
527
|
+
exp_valid_data = [1, 2, 3, 4, 'STRING', 1, 2]
|
528
|
+
assert_equal(exp_valid_data, a.valid_data)
|
529
|
+
a.add(20, false)
|
530
|
+
a.add(30, false)
|
531
|
+
assert_equal(exp_valid_data, a.valid_data)
|
532
532
|
a.set_valid_data
|
533
|
-
exp_valid_data_2=[1,2,3,4,
|
534
|
-
assert_equal(exp_valid_data_2,a.valid_data)
|
533
|
+
exp_valid_data_2 = [1, 2, 3, 4, 'STRING', 1, 2, 20, 30]
|
534
|
+
assert_equal(exp_valid_data_2, a.valid_data)
|
535
535
|
end
|
536
|
+
|
536
537
|
def test_set_value
|
537
|
-
@c[2]=10
|
538
|
-
expected=[5,5,10,5,5,6,6,7,8,9,10,1,2,3,4,nil
|
539
|
-
assert_equal(expected.data
|
538
|
+
@c[2] = 10
|
539
|
+
expected = [5, 5, 10, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99].to_vector
|
540
|
+
assert_equal(expected.data, @c.data)
|
540
541
|
end
|
542
|
+
|
541
543
|
def test_gsl
|
542
544
|
if Statsample.has_gsl?
|
543
|
-
a=Statsample::Vector.new([1,2,3,4,
|
545
|
+
a = Statsample::Vector.new([1, 2, 3, 4, 'STRING'], :scale)
|
544
546
|
|
545
|
-
assert_equal(2,a.mean)
|
546
|
-
assert_equal(a.variance_sample_ruby,a.variance_sample)
|
547
|
-
assert_equal(a.standard_deviation_sample_ruby,a.sds)
|
548
|
-
assert_equal(a.variance_population_ruby,a.variance_population)
|
549
|
-
assert_equal(a.standard_deviation_population_ruby,a.standard_deviation_population)
|
547
|
+
assert_equal(2, a.mean)
|
548
|
+
assert_equal(a.variance_sample_ruby, a.variance_sample)
|
549
|
+
assert_equal(a.standard_deviation_sample_ruby, a.sds)
|
550
|
+
assert_equal(a.variance_population_ruby, a.variance_population)
|
551
|
+
assert_equal(a.standard_deviation_population_ruby, a.standard_deviation_population)
|
550
552
|
assert_nothing_raised do
|
551
|
-
a=[].to_vector(:scale)
|
553
|
+
a = [].to_vector(:scale)
|
552
554
|
end
|
553
|
-
a.add(1,false)
|
554
|
-
a.add(2,false)
|
555
|
+
a.add(1, false)
|
556
|
+
a.add(2, false)
|
555
557
|
a.set_valid_data
|
556
|
-
assert_equal(3,a.sum)
|
557
|
-
b=[1,2,nil,3,4,5,nil,6].to_vector(:scale)
|
558
|
+
assert_equal(3, a.sum)
|
559
|
+
b = [1, 2, nil, 3, 4, 5, nil, 6].to_vector(:scale)
|
558
560
|
assert_equal(21, b.sum)
|
559
561
|
assert_equal(3.5, b.mean)
|
560
|
-
assert_equal(6,b.gsl.size)
|
561
|
-
c=[10,20,30,40,50,100,1000,2000,5000].to_scale
|
562
|
-
assert_in_delta(c.skew, c.skew_ruby
|
563
|
-
assert_in_delta(c.kurtosis, c.kurtosis_ruby
|
562
|
+
assert_equal(6, b.gsl.size)
|
563
|
+
c = [10, 20, 30, 40, 50, 100, 1000, 2000, 5000].to_scale
|
564
|
+
assert_in_delta(c.skew, c.skew_ruby, 0.0001)
|
565
|
+
assert_in_delta(c.kurtosis, c.kurtosis_ruby, 0.0001)
|
564
566
|
end
|
565
567
|
end
|
568
|
+
|
566
569
|
def test_vector_matrix
|
567
|
-
v1
|
568
|
-
v2
|
569
|
-
v3
|
570
|
-
ex=Matrix.rows([
|
571
|
-
assert_equal(ex,Statsample.vector_cols_matrix(v1,v2,v3))
|
570
|
+
v1 = %w(a a a b b b c c).to_vector
|
571
|
+
v2 = %w(1 3 4 5 6 4 3 2).to_vector
|
572
|
+
v3 = %w(1 0 0 0 1 1 1 0).to_vector
|
573
|
+
ex = Matrix.rows([%w(a 1 1), %w(a 3 0), %w(a 4 0), %w(b 5 0), %w(b 6 1), %w(b 4 1), %w(c 3 1), %w(c 2 0)])
|
574
|
+
assert_equal(ex, Statsample.vector_cols_matrix(v1, v2, v3))
|
572
575
|
end
|
576
|
+
|
573
577
|
def test_marshalling
|
574
|
-
v1=(0..100).to_a.collect{|
|
575
|
-
v2=Marshal.load(Marshal.dump(v1))
|
576
|
-
assert_equal(v1,v2)
|
578
|
+
v1 = (0..100).to_a.collect { |_n| rand(100) }.to_vector(:scale)
|
579
|
+
v2 = Marshal.load(Marshal.dump(v1))
|
580
|
+
assert_equal(v1, v2)
|
577
581
|
end
|
582
|
+
|
578
583
|
def test_dup
|
579
|
-
v1
|
580
|
-
v2=v1.dup
|
581
|
-
assert_equal(v1.data,v2.data)
|
582
|
-
assert_not_same(v1.data,v2.data)
|
583
|
-
assert_equal(v1.type,v2.type)
|
584
|
-
|
585
|
-
v1.type
|
586
|
-
assert_not_equal(v1.type,v2.type)
|
587
|
-
assert_equal(v1.missing_values,v2.missing_values)
|
588
|
-
assert_not_same(v1.missing_values,v2.missing_values)
|
589
|
-
assert_equal(v1.labels,v2.labels)
|
590
|
-
assert_not_same(v1.labels,v2.labels)
|
591
|
-
|
592
|
-
v3=v1.dup_empty
|
593
|
-
assert_equal([],v3.data)
|
594
|
-
assert_not_equal(v1.data,v3.data)
|
595
|
-
assert_not_same(v1.data,v3.data)
|
596
|
-
assert_equal(v1.type,v3.type)
|
597
|
-
v1.type
|
598
|
-
v3.type
|
599
|
-
assert_not_equal(v1.type,v3.type)
|
600
|
-
assert_equal(v1.missing_values,v3.missing_values)
|
601
|
-
assert_not_same(v1.missing_values,v3.missing_values)
|
602
|
-
assert_equal(v1.labels,v3.labels)
|
603
|
-
assert_not_same(v1.labels,v3.labels)
|
584
|
+
v1 = %w(a a a b b b c c).to_vector
|
585
|
+
v2 = v1.dup
|
586
|
+
assert_equal(v1.data, v2.data)
|
587
|
+
assert_not_same(v1.data, v2.data)
|
588
|
+
assert_equal(v1.type, v2.type)
|
589
|
+
|
590
|
+
v1.type = :ordinal
|
591
|
+
assert_not_equal(v1.type, v2.type)
|
592
|
+
assert_equal(v1.missing_values, v2.missing_values)
|
593
|
+
assert_not_same(v1.missing_values, v2.missing_values)
|
594
|
+
assert_equal(v1.labels, v2.labels)
|
595
|
+
assert_not_same(v1.labels, v2.labels)
|
596
|
+
|
597
|
+
v3 = v1.dup_empty
|
598
|
+
assert_equal([], v3.data)
|
599
|
+
assert_not_equal(v1.data, v3.data)
|
600
|
+
assert_not_same(v1.data, v3.data)
|
601
|
+
assert_equal(v1.type, v3.type)
|
602
|
+
v1.type = :ordinal
|
603
|
+
v3.type = :nominal
|
604
|
+
assert_not_equal(v1.type, v3.type)
|
605
|
+
assert_equal(v1.missing_values, v3.missing_values)
|
606
|
+
assert_not_same(v1.missing_values, v3.missing_values)
|
607
|
+
assert_equal(v1.labels, v3.labels)
|
608
|
+
assert_not_same(v1.labels, v3.labels)
|
604
609
|
end
|
610
|
+
|
605
611
|
def test_paired_ties
|
606
|
-
a=[0,0,0,1,1,2,3,3,4,4,4].to_vector(:ordinal)
|
607
|
-
expected=[2,2,2,4.5,4.5,6,7.5,7.5,10,10,10].to_vector(:ordinal)
|
608
|
-
assert_equal(expected,a.ranked)
|
612
|
+
a = [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4].to_vector(:ordinal)
|
613
|
+
expected = [2, 2, 2, 4.5, 4.5, 6, 7.5, 7.5, 10, 10, 10].to_vector(:ordinal)
|
614
|
+
assert_equal(expected, a.ranked)
|
609
615
|
end
|
616
|
+
|
610
617
|
def test_dichotomize
|
611
|
-
a=
|
612
|
-
exp=[0,0,0,1,1,1,nil].to_scale
|
613
|
-
assert_equal(exp,a.dichotomize)
|
614
|
-
a=
|
615
|
-
exp=[0,0,0,1,1,1,1].to_scale
|
616
|
-
assert_equal(exp,a.dichotomize)
|
617
|
-
a=
|
618
|
-
exp=[0,0,0,0,1,1,nil].to_scale
|
619
|
-
assert_equal(exp,a.dichotomize(1))
|
620
|
-
a= %w
|
621
|
-
exp=[0,0,0,1,1,1].to_scale
|
618
|
+
a = [0, 0, 0, 1, 2, 3, nil].to_vector
|
619
|
+
exp = [0, 0, 0, 1, 1, 1, nil].to_scale
|
620
|
+
assert_equal(exp, a.dichotomize)
|
621
|
+
a = [1, 1, 1, 2, 2, 2, 3].to_vector
|
622
|
+
exp = [0, 0, 0, 1, 1, 1, 1].to_scale
|
623
|
+
assert_equal(exp, a.dichotomize)
|
624
|
+
a = [0, 0, 0, 1, 2, 3, nil].to_vector
|
625
|
+
exp = [0, 0, 0, 0, 1, 1, nil].to_scale
|
626
|
+
assert_equal(exp, a.dichotomize(1))
|
627
|
+
a = %w(a a a b c d).to_vector
|
628
|
+
exp = [0, 0, 0, 1, 1, 1].to_scale
|
622
629
|
assert_equal(exp, a.dichotomize)
|
623
630
|
end
|
631
|
+
|
624
632
|
def test_can_be_methods
|
625
|
-
a=
|
633
|
+
a = [0, 0, 0, 1, 2, 3, nil].to_vector
|
626
634
|
assert(a.can_be_scale?)
|
627
|
-
a=[0,
|
635
|
+
a = [0, 's', 0, 1, 2, 3, nil].to_vector
|
628
636
|
assert(!a.can_be_scale?)
|
629
|
-
a.missing_values=[
|
637
|
+
a.missing_values = ['s']
|
630
638
|
assert(a.can_be_scale?)
|
631
639
|
|
632
|
-
a=[Date.new(2009,10,10), Date.today
|
640
|
+
a = [Date.new(2009, 10, 10), Date.today, '2009-10-10', '2009-1-1', nil, 'NOW'].to_vector
|
633
641
|
assert(a.can_be_date?)
|
634
|
-
a=[Date.new(2009,10,10), Date.today
|
642
|
+
a = [Date.new(2009, 10, 10), Date.today, nil, 'sss'].to_vector
|
635
643
|
assert(!a.can_be_date?)
|
636
644
|
end
|
645
|
+
|
637
646
|
def test_date_vector
|
638
|
-
a=[Date.new(2009,10,10), :NOW,
|
647
|
+
a = [Date.new(2009, 10, 10), :NOW, '2009-10-10', '2009-1-1', nil, 'NOW', 'MISSING'].to_vector(:date, missing_values: ['MISSING'])
|
639
648
|
|
640
|
-
assert(a.type
|
641
|
-
expected=[Date.new(2009,10,10), Date.today
|
649
|
+
assert(a.type == :date)
|
650
|
+
expected = [Date.new(2009, 10, 10), Date.today, Date.new(2009, 10, 10), Date.new(2009, 1, 1), nil, Date.today, nil]
|
642
651
|
assert_equal(expected, a.date_data_with_nils)
|
643
652
|
end
|
644
653
|
end
|