statsample 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +4 -3
- data/History.txt +4 -0
- data/README.md +4 -0
- data/lib/statsample/converter/csv.rb +41 -54
- data/lib/statsample/converters.rb +18 -19
- data/lib/statsample/version.rb +1 -1
- data/test/fixtures/scientific_notation.csv +4 -0
- data/test/helpers_tests.rb +37 -38
- data/test/test_analysis.rb +96 -97
- data/test/test_anova_contrast.rb +22 -22
- data/test/test_anovaoneway.rb +12 -12
- data/test/test_anovatwoway.rb +16 -17
- data/test/test_anovatwowaywithdataset.rb +22 -24
- data/test/test_anovawithvectors.rb +67 -69
- data/test/test_awesome_print_bug.rb +9 -9
- data/test/test_bartlettsphericity.rb +13 -13
- data/test/test_bivariate.rb +122 -126
- data/test/test_codification.rb +51 -49
- data/test/test_crosstab.rb +44 -40
- data/test/test_csv.rb +52 -70
- data/test/test_dataset.rb +347 -330
- data/test/test_dominance_analysis.rb +22 -24
- data/test/test_factor.rb +163 -166
- data/test/test_factor_map.rb +25 -30
- data/test/test_factor_pa.rb +28 -28
- data/test/test_ggobi.rb +19 -18
- data/test/test_gsl.rb +13 -15
- data/test/test_histogram.rb +74 -77
- data/test/test_matrix.rb +29 -31
- data/test/test_multiset.rb +132 -126
- data/test/test_regression.rb +143 -149
- data/test/test_reliability.rb +149 -155
- data/test/test_reliability_icc.rb +100 -104
- data/test/test_reliability_skillscale.rb +38 -40
- data/test/test_resample.rb +14 -12
- data/test/test_rserve_extension.rb +33 -33
- data/test/test_srs.rb +5 -5
- data/test/test_statistics.rb +52 -50
- data/test/test_stest.rb +27 -28
- data/test/test_stratified.rb +10 -10
- data/test/test_test_f.rb +17 -17
- data/test/test_test_kolmogorovsmirnov.rb +21 -21
- data/test/test_test_t.rb +52 -52
- data/test/test_umannwhitney.rb +16 -16
- data/test/test_vector.rb +419 -410
- data/test/test_wilcoxonsignedrank.rb +60 -63
- data/test/test_xls.rb +41 -41
- metadata +55 -5
- data/web/Rakefile +0 -39
data/test/test_factor_pa.rb
CHANGED
@@ -1,52 +1,52 @@
|
|
1
|
-
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
2
|
-
#require 'rserve'
|
3
|
-
#require 'statsample/rserve_extension'
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
# require 'rserve'
|
3
|
+
# require 'statsample/rserve_extension'
|
4
4
|
|
5
|
-
class StatsampleFactorTestCase <
|
5
|
+
class StatsampleFactorTestCase < Minitest::Test
|
6
6
|
include Statsample::Fixtures
|
7
7
|
# Based on Hardle and Simar
|
8
8
|
def setup
|
9
|
-
@fixtures_dir=File.expand_path(File.dirname(__FILE__)+
|
9
|
+
@fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
|
10
10
|
end
|
11
|
+
|
11
12
|
def test_parallelanalysis_with_data
|
12
13
|
if Statsample.has_gsl?
|
13
|
-
samples=100
|
14
|
-
variables=10
|
15
|
-
iterations=50
|
14
|
+
samples = 100
|
15
|
+
variables = 10
|
16
|
+
iterations = 50
|
16
17
|
rng = Distribution::Normal.rng
|
17
|
-
f1=samples.times.collect {rng.call}.to_scale
|
18
|
-
f2=samples.times.collect {rng.call}.to_scale
|
19
|
-
vectors={}
|
18
|
+
f1 = samples.times.collect { rng.call }.to_scale
|
19
|
+
f2 = samples.times.collect { rng.call }.to_scale
|
20
|
+
vectors = {}
|
20
21
|
variables.times do |i|
|
21
|
-
if i<5
|
22
|
-
vectors["v#{i}"]=samples.times.collect {|nv|
|
23
|
-
f1[nv]*5+f2[nv]*2+rng.call
|
22
|
+
if i < 5
|
23
|
+
vectors["v#{i}"] = samples.times.collect {|nv|
|
24
|
+
f1[nv] * 5 + f2[nv] * 2 + rng.call
|
24
25
|
}.to_scale
|
25
26
|
else
|
26
|
-
vectors["v#{i}"]=samples.times.collect {|nv|
|
27
|
-
f2[nv]*5+f1[nv]*2+rng.call
|
27
|
+
vectors["v#{i}"] = samples.times.collect {|nv|
|
28
|
+
f2[nv] * 5 + f1[nv] * 2 + rng.call
|
28
29
|
}.to_scale
|
29
30
|
end
|
30
|
-
|
31
31
|
end
|
32
|
-
ds=vectors.to_dataset
|
33
|
-
|
34
|
-
pa1=Statsample::Factor::ParallelAnalysis.new(ds, :
|
35
|
-
pa2=Statsample::Factor::ParallelAnalysis.with_random_data(samples,variables
|
32
|
+
ds = vectors.to_dataset
|
33
|
+
|
34
|
+
pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
|
35
|
+
pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
|
36
36
|
3.times do |n|
|
37
|
-
var="ev_0000#{n+1}"
|
38
|
-
assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean,0.05)
|
37
|
+
var = "ev_0000#{n + 1}"
|
38
|
+
assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.05)
|
39
39
|
end
|
40
40
|
else
|
41
|
-
skip(
|
41
|
+
skip('Too slow without GSL')
|
42
42
|
end
|
43
|
-
|
44
43
|
end
|
44
|
+
|
45
45
|
def test_parallelanalysis
|
46
|
-
pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8
|
46
|
+
pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
|
47
47
|
assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
|
48
48
|
assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
|
49
49
|
assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
|
50
|
-
assert(pa.summary.size>0)
|
51
|
-
end
|
50
|
+
assert(pa.summary.size > 0)
|
51
|
+
end
|
52
52
|
end
|
data/test/test_ggobi.rb
CHANGED
@@ -1,24 +1,25 @@
|
|
1
|
-
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
require 'ostruct'
|
3
|
-
class StatsampleGGobiTestCase <
|
4
|
-
|
3
|
+
class StatsampleGGobiTestCase < Minitest::Test
|
5
4
|
def setup
|
6
|
-
v1=([10.2,20.3,10,20,30,40,30,20,30,40]*10).to_vector(:scale)
|
7
|
-
@v2=(%w
|
8
|
-
@v2.labels={
|
9
|
-
v3=([1,2,3,4,5,4,3,2,1,2]*10).to_vector(:ordinal)
|
10
|
-
@ds={'v1'=>v1,'v2'
|
5
|
+
v1 = ([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10).to_vector(:scale)
|
6
|
+
@v2 = (%w(a b c a a a b b c d) * 10).to_vector(:nominal)
|
7
|
+
@v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
|
8
|
+
v3 = ([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10).to_vector(:ordinal)
|
9
|
+
@ds = { 'v1' => v1, 'v2' => @v2, 'v3' => v3 }.to_dataset
|
11
10
|
end
|
11
|
+
|
12
12
|
def test_values_definition
|
13
|
-
a=[1.0,2,
|
14
|
-
assert_equal(
|
13
|
+
a = [1.0, 2, 'a', nil]
|
14
|
+
assert_equal('1.0 2 a NA', Statsample::GGobi.values_definition(a, 'NA'))
|
15
15
|
end
|
16
|
+
|
16
17
|
def test_variable_definition
|
17
|
-
carrier=OpenStruct.new
|
18
|
-
carrier.categorials=[]
|
19
|
-
carrier.conversions={}
|
20
|
-
real_var_definition=Statsample::GGobi.variable_definition(carrier
|
21
|
-
|
18
|
+
carrier = OpenStruct.new
|
19
|
+
carrier.categorials = []
|
20
|
+
carrier.conversions = {}
|
21
|
+
real_var_definition = Statsample::GGobi.variable_definition(carrier, @v2, 'variable 2', 'v2')
|
22
|
+
expected = <<-EOS
|
22
23
|
<categoricalvariable name="variable 2" nickname="v2">
|
23
24
|
<levels count="4">
|
24
25
|
<level value="1">letter a</level>
|
@@ -27,8 +28,8 @@ class StatsampleGGobiTestCase < MiniTest::Unit::TestCase
|
|
27
28
|
<level value="4">letter d</level></levels>
|
28
29
|
</categoricalvariable>
|
29
30
|
EOS
|
30
|
-
assert_equal(expected.gsub(/\s/,
|
31
|
-
assert_equal({'variable 2'=>{'a'=>1,'b'=>2,'c'=>3,'d'=>4}},carrier.conversions)
|
32
|
-
assert_equal(['variable 2'],carrier.categorials)
|
31
|
+
assert_equal(expected.gsub(/\s/, ' '), real_var_definition.gsub(/\s/, ' '))
|
32
|
+
assert_equal({ 'variable 2' => { 'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4 } }, carrier.conversions)
|
33
|
+
assert_equal(['variable 2'], carrier.categorials)
|
33
34
|
end
|
34
35
|
end
|
data/test/test_gsl.rb
CHANGED
@@ -1,17 +1,15 @@
|
|
1
|
-
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
2
|
-
class StatsampleGSLTestCase <
|
3
|
-
should_with_gsl
|
4
|
-
a=[1,2,3,4,20].to_vector(:scale)
|
5
|
-
b=[3,2,3,4,50].to_vector(:scale)
|
6
|
-
c=[6,2,3,4,3].to_vector(:scale)
|
7
|
-
ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
8
|
-
gsl=ds.to_matrix.to_gsl
|
9
|
-
assert_equal(5,gsl.size1)
|
10
|
-
assert_equal(3,gsl.size2)
|
11
|
-
matrix=gsl.to_matrix
|
12
|
-
assert_equal(5,matrix.row_size)
|
13
|
-
assert_equal(3,matrix.column_size)
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
class StatsampleGSLTestCase < Minitest::Test
|
3
|
+
should_with_gsl 'matrix with gsl' do
|
4
|
+
a = [1, 2, 3, 4, 20].to_vector(:scale)
|
5
|
+
b = [3, 2, 3, 4, 50].to_vector(:scale)
|
6
|
+
c = [6, 2, 3, 4, 3].to_vector(:scale)
|
7
|
+
ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
|
8
|
+
gsl = ds.to_matrix.to_gsl
|
9
|
+
assert_equal(5, gsl.size1)
|
10
|
+
assert_equal(3, gsl.size2)
|
11
|
+
matrix = gsl.to_matrix
|
12
|
+
assert_equal(5, matrix.row_size)
|
13
|
+
assert_equal(3, matrix.column_size)
|
14
14
|
end
|
15
15
|
end
|
16
|
-
|
17
|
-
|
data/test/test_histogram.rb
CHANGED
@@ -1,112 +1,109 @@
|
|
1
|
-
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
|
3
|
-
|
4
|
-
class StatsampleHistogramTestCase < MiniTest::Unit::TestCase
|
3
|
+
class StatsampleHistogramTestCase < Minitest::Test
|
5
4
|
context Statsample::Histogram do
|
6
|
-
should
|
5
|
+
should 'alloc correctly with integer' do
|
7
6
|
h = Statsample::Histogram.alloc(4)
|
8
|
-
assert_equal([0.0]*4, h.bin)
|
9
|
-
assert_equal([0.0]*5, h.range)
|
7
|
+
assert_equal([0.0] * 4, h.bin)
|
8
|
+
assert_equal([0.0] * 5, h.range)
|
10
9
|
end
|
11
|
-
should
|
10
|
+
should 'alloc correctly with array' do
|
12
11
|
h = Statsample::Histogram.alloc([1, 3, 7, 9, 20])
|
13
|
-
assert_equal([0.0]*4, h.bin)
|
14
|
-
assert_equal([1,3,7,9,20], h.range)
|
12
|
+
assert_equal([0.0] * 4, h.bin)
|
13
|
+
assert_equal([1, 3, 7, 9, 20], h.range)
|
15
14
|
end
|
16
|
-
should
|
15
|
+
should 'alloc correctly with integer and min, max array' do
|
17
16
|
h = Statsample::Histogram.alloc(5, [0, 5])
|
18
|
-
assert_equal([0.0,1.0,2.0,3.0,4.0,5.0], h.range)
|
19
|
-
assert_equal([0.0]*5,h.bin)
|
17
|
+
assert_equal([0.0, 1.0, 2.0, 3.0, 4.0, 5.0], h.range)
|
18
|
+
assert_equal([0.0] * 5, h.bin)
|
20
19
|
end
|
21
|
-
should
|
20
|
+
should 'bin() method return correct number of bins' do
|
22
21
|
h = Statsample::Histogram.alloc(4)
|
23
|
-
assert_equal(4,h.bins)
|
22
|
+
assert_equal(4, h.bins)
|
24
23
|
end
|
25
|
-
should
|
26
|
-
h = Statsample::Histogram.alloc(5, [0, 5])
|
24
|
+
should 'increment correctly' do
|
25
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
27
26
|
h.increment 2.5
|
28
|
-
assert_equal([0.0,0.0,1.0,0.0,0.0], h.bin)
|
29
|
-
h.increment [0.5,0.5,3.5,3.5]
|
30
|
-
assert_equal([2.0,0.0,1.0,2.0,0.0], h.bin)
|
27
|
+
assert_equal([0.0, 0.0, 1.0, 0.0, 0.0], h.bin)
|
28
|
+
h.increment [0.5, 0.5, 3.5, 3.5]
|
29
|
+
assert_equal([2.0, 0.0, 1.0, 2.0, 0.0], h.bin)
|
31
30
|
h.increment 0
|
32
|
-
assert_equal([3.0,0.0,1.0,2.0,0.0], h.bin)
|
31
|
+
assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
|
33
32
|
h.increment 5
|
34
|
-
assert_equal([3.0,0.0,1.0,2.0,0.0], h.bin)
|
33
|
+
assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
|
35
34
|
end
|
36
|
-
|
37
|
-
should
|
38
|
-
h = Statsample::Histogram.alloc_uniform(5,0,10)
|
39
|
-
assert_equal(5,h.bins)
|
40
|
-
assert_equal([0.0]*5,h.bin)
|
41
|
-
assert_equal([0.0,2.0,4.0,6.0,8.0,10.0], h.range)
|
35
|
+
|
36
|
+
should 'alloc_uniform correctly with n, min,max' do
|
37
|
+
h = Statsample::Histogram.alloc_uniform(5, 0, 10)
|
38
|
+
assert_equal(5, h.bins)
|
39
|
+
assert_equal([0.0] * 5, h.bin)
|
40
|
+
assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
|
42
41
|
end
|
43
|
-
should
|
42
|
+
should 'alloc_uniform correctly with n, [min,max]' do
|
44
43
|
h = Statsample::Histogram.alloc_uniform(5, [0, 10])
|
45
|
-
assert_equal(5,h.bins)
|
46
|
-
assert_equal([0.0]*5,h.bin)
|
47
|
-
assert_equal([0.0,2.0,4.0,6.0,8.0,10.0], h.range)
|
44
|
+
assert_equal(5, h.bins)
|
45
|
+
assert_equal([0.0] * 5, h.bin)
|
46
|
+
assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
|
48
47
|
end
|
49
|
-
should
|
50
|
-
h = Statsample::Histogram.alloc_uniform(5,2,12)
|
48
|
+
should 'get_range()' do
|
49
|
+
h = Statsample::Histogram.alloc_uniform(5, 2, 12)
|
51
50
|
5.times {|i|
|
52
|
-
assert_equal([2+i*2, 4+i*2], h.get_range(i))
|
51
|
+
assert_equal([2 + i * 2, 4 + i * 2], h.get_range(i))
|
52
|
+
}
|
53
|
+
end
|
54
|
+
should 'min() and max()' do
|
55
|
+
h = Statsample::Histogram.alloc_uniform(5, 2, 12)
|
56
|
+
assert_equal(2, h.min)
|
57
|
+
assert_equal(12, h.max)
|
58
|
+
end
|
59
|
+
should 'max_val()' do
|
60
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
61
|
+
100.times { h.increment(rand * 5) }
|
62
|
+
max = h.bin[0]
|
63
|
+
(1..4).each {|i|
|
64
|
+
max = h.bin[i] if h.bin[i] > max
|
53
65
|
}
|
66
|
+
assert_equal(max, h.max_val)
|
54
67
|
end
|
55
|
-
should
|
56
|
-
h=Statsample::Histogram.
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
assert_equal(max,h.max_val)
|
68
|
-
end
|
69
|
-
should "min_val()" do
|
70
|
-
h = Statsample::Histogram.alloc(5, [0, 5])
|
71
|
-
100.times {h.increment(rand*5)}
|
72
|
-
min=h.bin[0]
|
73
|
-
(1..4).each {|i|
|
74
|
-
min = h.bin[i] if h.bin[i]<min
|
75
|
-
}
|
76
|
-
assert_equal(min,h.min_val)
|
77
|
-
end
|
78
|
-
should "return correct estimated mean" do
|
79
|
-
a=[1.5,1.5,1.5,3.5,3.5,3.5].to_scale
|
80
|
-
h=Statsample::Histogram.alloc(5,[0,5])
|
68
|
+
should 'min_val()' do
|
69
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
70
|
+
100.times { h.increment(rand * 5) }
|
71
|
+
min = h.bin[0]
|
72
|
+
(1..4).each {|i|
|
73
|
+
min = h.bin[i] if h.bin[i] < min
|
74
|
+
}
|
75
|
+
assert_equal(min, h.min_val)
|
76
|
+
end
|
77
|
+
should 'return correct estimated mean' do
|
78
|
+
a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5].to_scale
|
79
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
81
80
|
h.increment(a)
|
82
81
|
assert_equal(2.5, h.estimated_mean)
|
83
82
|
end
|
84
|
-
should
|
85
|
-
a=[0.5,1.5,1.5,1.5,2.5, 3.5,3.5,3.5,4.5].to_scale
|
86
|
-
h=Statsample::Histogram.alloc(5,[0,5])
|
83
|
+
should 'return correct estimated standard deviation' do
|
84
|
+
a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5].to_scale
|
85
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
87
86
|
h.increment(a)
|
88
87
|
assert_equal(a.sd, h.estimated_standard_deviation)
|
89
88
|
end
|
90
|
-
should
|
91
|
-
h=Statsample::Histogram.alloc(5,[0,5])
|
92
|
-
n=rand(100)
|
93
|
-
n.times { h.increment(1)}
|
89
|
+
should 'return correct sum for all values' do
|
90
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
91
|
+
n = rand(100)
|
92
|
+
n.times { h.increment(1) }
|
94
93
|
assert_equal(n, h.sum)
|
95
94
|
end
|
96
|
-
should
|
97
|
-
h=Statsample::Histogram.alloc(5,[0,5])
|
98
|
-
h.increment([0.5,2.5,4.5])
|
99
|
-
assert_equal(1,h.sum(0,1))
|
100
|
-
assert_equal(2,h.sum(1,4))
|
101
|
-
|
95
|
+
should 'return correct sum for a subset of values' do
|
96
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
97
|
+
h.increment([0.5, 2.5, 4.5])
|
98
|
+
assert_equal(1, h.sum(0, 1))
|
99
|
+
assert_equal(2, h.sum(1, 4))
|
102
100
|
end
|
103
|
-
should
|
101
|
+
should 'not raise exception when all values equal' do
|
104
102
|
assert_nothing_raised do
|
105
|
-
a = [5,5,5,5,5,5].to_scale
|
106
|
-
h=Statsample::Graph::Histogram.new(a)
|
103
|
+
a = [5, 5, 5, 5, 5, 5].to_scale
|
104
|
+
h = Statsample::Graph::Histogram.new(a)
|
107
105
|
h.to_svg
|
108
106
|
end
|
109
107
|
end
|
110
|
-
|
111
108
|
end
|
112
109
|
end
|
data/test/test_matrix.rb
CHANGED
@@ -1,50 +1,48 @@
|
|
1
|
-
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
|
3
|
-
class StatsampleMatrixTestCase <
|
4
|
-
|
3
|
+
class StatsampleMatrixTestCase < Minitest::Test
|
5
4
|
def test_to_dataset
|
6
|
-
m=Matrix[[1,4],[2,5],[3,6]]
|
5
|
+
m = Matrix[[1, 4], [2, 5], [3, 6]]
|
7
6
|
m.extend Statsample::NamedMatrix
|
8
|
-
m.fields_y
|
9
|
-
m.name=
|
10
|
-
samples=100
|
11
|
-
x1=[1,2,3].to_scale
|
12
|
-
x2=[4,5,6].to_scale
|
13
|
-
ds={'x1'=>x1,'x2'=>x2}.to_dataset
|
14
|
-
ds.name=
|
15
|
-
obs=m.to_dataset
|
16
|
-
assert_equal(ds['x1'],obs['x1'])
|
17
|
-
assert_equal(ds['x2'],obs['x2'])
|
18
|
-
assert_equal(ds['x1'].mean,obs['x1'].mean)
|
19
|
-
|
20
|
-
|
7
|
+
m.fields_y = %w(x1 x2)
|
8
|
+
m.name = 'test'
|
9
|
+
samples = 100
|
10
|
+
x1 = [1, 2, 3].to_scale
|
11
|
+
x2 = [4, 5, 6].to_scale
|
12
|
+
ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
|
13
|
+
ds.name = 'test'
|
14
|
+
obs = m.to_dataset
|
15
|
+
assert_equal(ds['x1'], obs['x1'])
|
16
|
+
assert_equal(ds['x2'], obs['x2'])
|
17
|
+
assert_equal(ds['x1'].mean, obs['x1'].mean)
|
21
18
|
end
|
19
|
+
|
22
20
|
def test_covariate
|
23
|
-
a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
|
21
|
+
a = Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
|
24
22
|
a.extend Statsample::CovariateMatrix
|
25
|
-
a.fields
|
23
|
+
a.fields = %w(a b c)
|
26
24
|
assert_equal(:correlation, a._type)
|
27
25
|
|
28
|
-
assert_equal(Matrix[[0.5],[0.3]], a.submatrix(%w
|
29
|
-
assert_equal(Matrix[[1.0, 0.2]
|
30
|
-
assert_equal(:correlation, a.submatrix(%w
|
26
|
+
assert_equal(Matrix[[0.5], [0.3]], a.submatrix(%w(c a), %w(b)))
|
27
|
+
assert_equal(Matrix[[1.0, 0.2], [0.2, 1.0]], a.submatrix(%w(c a)))
|
28
|
+
assert_equal(:correlation, a.submatrix(%w(c a))._type)
|
31
29
|
|
32
|
-
a=Matrix[[20,30,10], [30,60,50], [10,50,50]]
|
30
|
+
a = Matrix[[20, 30, 10], [30, 60, 50], [10, 50, 50]]
|
33
31
|
|
34
32
|
a.extend Statsample::CovariateMatrix
|
35
33
|
|
36
34
|
assert_equal(:covariance, a._type)
|
37
35
|
|
38
|
-
a=50.times.collect {rand
|
39
|
-
b=50.times.collect {rand
|
40
|
-
c=50.times.collect {rand
|
41
|
-
ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
42
|
-
corr=Statsample::Bivariate.correlation_matrix(ds)
|
43
|
-
real=Statsample::Bivariate.covariance_matrix(ds).correlation
|
36
|
+
a = 50.times.collect { rand }.to_scale
|
37
|
+
b = 50.times.collect { rand }.to_scale
|
38
|
+
c = 50.times.collect { rand }.to_scale
|
39
|
+
ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
|
40
|
+
corr = Statsample::Bivariate.correlation_matrix(ds)
|
41
|
+
real = Statsample::Bivariate.covariance_matrix(ds).correlation
|
44
42
|
corr.row_size.times do |i|
|
45
43
|
corr.column_size.times do |j|
|
46
|
-
assert_in_delta(corr[i,j], real[i,j],1e-15)
|
44
|
+
assert_in_delta(corr[i, j], real[i, j], 1e-15)
|
47
45
|
end
|
48
46
|
end
|
49
|
-
end
|
47
|
+
end
|
50
48
|
end
|
data/test/test_multiset.rb
CHANGED
@@ -1,158 +1,164 @@
|
|
1
|
-
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
|
3
|
-
|
4
|
-
class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
|
3
|
+
class StatsampleMultisetTestCase < Minitest::Test
|
5
4
|
def setup
|
6
|
-
@x
|
7
|
-
@y=[1,2,3,4,5,6,7,8].to_scale
|
8
|
-
@z=[10,11,12,13,14,15,16,17].to_scale
|
9
|
-
@ds={'x'
|
10
|
-
@ms
|
5
|
+
@x = %w(a a a a b b b b).to_vector
|
6
|
+
@y = [1, 2, 3, 4, 5, 6, 7, 8].to_scale
|
7
|
+
@z = [10, 11, 12, 13, 14, 15, 16, 17].to_scale
|
8
|
+
@ds = { 'x' => @x, 'y' => @y, 'z' => @z }.to_dataset
|
9
|
+
@ms = @ds.to_multiset_by_split('x')
|
11
10
|
end
|
11
|
+
|
12
12
|
def test_creation
|
13
|
-
v1a=[1,2,3,4,5].to_vector
|
14
|
-
v2b=[11,21,31,41,51].to_vector
|
15
|
-
v3c=[21,23,34,45,56].to_vector
|
16
|
-
ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
|
17
|
-
v1b=[15,25,35,45,55].to_vector
|
18
|
-
v2b=[11,21,31,41,51].to_vector
|
19
|
-
v3b=[21,23,34,45,56].to_vector
|
20
|
-
ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
|
21
|
-
ms=Statsample::Multiset.new(
|
22
|
-
ms.add_dataset('ds1',ds1)
|
23
|
-
ms.add_dataset('ds2',ds2)
|
24
|
-
assert_equal(ds1,ms['ds1'])
|
25
|
-
assert_equal(ds2,ms['ds2'])
|
26
|
-
assert_equal(v1a,ms['ds1']['v1'])
|
27
|
-
assert_not_equal(v1b,ms['ds1']['v1'])
|
28
|
-
ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
|
13
|
+
v1a = [1, 2, 3, 4, 5].to_vector
|
14
|
+
v2b = [11, 21, 31, 41, 51].to_vector
|
15
|
+
v3c = [21, 23, 34, 45, 56].to_vector
|
16
|
+
ds1 = { 'v1' => v1a, 'v2' => v2b, 'v3' => v3c }.to_dataset
|
17
|
+
v1b = [15, 25, 35, 45, 55].to_vector
|
18
|
+
v2b = [11, 21, 31, 41, 51].to_vector
|
19
|
+
v3b = [21, 23, 34, 45, 56].to_vector
|
20
|
+
ds2 = { 'v1' => v1b, 'v2' => v2b, 'v3' => v3b }.to_dataset
|
21
|
+
ms = Statsample::Multiset.new(%w(v1 v2 v3))
|
22
|
+
ms.add_dataset('ds1', ds1)
|
23
|
+
ms.add_dataset('ds2', ds2)
|
24
|
+
assert_equal(ds1, ms['ds1'])
|
25
|
+
assert_equal(ds2, ms['ds2'])
|
26
|
+
assert_equal(v1a, ms['ds1']['v1'])
|
27
|
+
assert_not_equal(v1b, ms['ds1']['v1'])
|
28
|
+
ds3 = { 'v1' => v1b, 'v2' => v2b }.to_dataset
|
29
29
|
assert_raise ArgumentError do
|
30
30
|
ms.add_dataset(ds3)
|
31
31
|
end
|
32
32
|
end
|
33
|
+
|
33
34
|
def test_creation_empty
|
34
|
-
ms=Statsample::Multiset.new_empty_vectors(%w
|
35
|
-
ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w
|
36
|
-
ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w
|
37
|
-
ms2=Statsample::Multiset.new(%w
|
38
|
-
ms2.add_dataset('male',ds_male)
|
39
|
-
ms2.add_dataset('female',ds_female)
|
40
|
-
assert_equal(ms2.fields,ms.fields)
|
41
|
-
assert_equal(ms2['male'],ms['male'])
|
42
|
-
assert_equal(ms2['female'],ms['female'])
|
35
|
+
ms = Statsample::Multiset.new_empty_vectors(%w(id age name), %w(male female))
|
36
|
+
ds_male = { 'id' => [].to_vector, 'age' => [].to_vector, 'name' => [].to_vector }.to_dataset(%w(id age name))
|
37
|
+
ds_female = { 'id' => [].to_vector, 'age' => [].to_vector, 'name' => [].to_vector }.to_dataset(%w(id age name))
|
38
|
+
ms2 = Statsample::Multiset.new(%w(id age name))
|
39
|
+
ms2.add_dataset('male', ds_male)
|
40
|
+
ms2.add_dataset('female', ds_female)
|
41
|
+
assert_equal(ms2.fields, ms.fields)
|
42
|
+
assert_equal(ms2['male'], ms['male'])
|
43
|
+
assert_equal(ms2['female'], ms['female'])
|
43
44
|
end
|
45
|
+
|
44
46
|
def test_to_multiset_by_split_one
|
45
|
-
sex
|
46
|
-
city
|
47
|
-
age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
|
48
|
-
ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
|
49
|
-
ms=ds.to_multiset_by_split('sex')
|
50
|
-
assert_equal(2,ms.n_datasets)
|
51
|
-
assert_equal(%w
|
52
|
-
assert_equal(6,ms['m'].cases)
|
53
|
-
assert_equal(4,ms['f'].cases)
|
54
|
-
assert_equal(%w
|
55
|
-
assert_equal([34,33,35,36],ms['f']['age'].to_a)
|
47
|
+
sex = %w(m m m m m f f f f m).to_vector(:nominal)
|
48
|
+
city = %w(London Paris NY London Paris NY London Paris NY Tome).to_vector(:nominal)
|
49
|
+
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:scale)
|
50
|
+
ds = { 'sex' => sex, 'city' => city, 'age' => age }.to_dataset
|
51
|
+
ms = ds.to_multiset_by_split('sex')
|
52
|
+
assert_equal(2, ms.n_datasets)
|
53
|
+
assert_equal(%w(f m), ms.datasets.keys.sort)
|
54
|
+
assert_equal(6, ms['m'].cases)
|
55
|
+
assert_equal(4, ms['f'].cases)
|
56
|
+
assert_equal(%w(London Paris NY London Paris Tome), ms['m']['city'].to_a)
|
57
|
+
assert_equal([34, 33, 35, 36], ms['f']['age'].to_a)
|
56
58
|
end
|
59
|
+
|
57
60
|
def test_to_multiset_by_split_multiple
|
58
|
-
sex
|
59
|
-
city
|
60
|
-
hair
|
61
|
-
age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
|
62
|
-
ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w
|
63
|
-
ms=ds.to_multiset_by_split('sex','city','hair')
|
64
|
-
assert_equal(8,ms.n_datasets)
|
65
|
-
assert_equal(3,ms[%w
|
66
|
-
assert_equal(3,ms[%w
|
67
|
-
assert_equal(1,ms[%w
|
61
|
+
sex = %w(m m m m m m m m m m f f f f f f f f f f).to_vector(:nominal)
|
62
|
+
city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris).to_vector(:nominal)
|
63
|
+
hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black).to_vector(:nominal)
|
64
|
+
age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:scale)
|
65
|
+
ds = { 'sex' => sex, 'city' => city, 'hair' => hair, 'age' => age }.to_dataset(%w(sex city hair age))
|
66
|
+
ms = ds.to_multiset_by_split('sex', 'city', 'hair')
|
67
|
+
assert_equal(8, ms.n_datasets)
|
68
|
+
assert_equal(3, ms[%w(m London blonde)].cases)
|
69
|
+
assert_equal(3, ms[%w(m London blonde)].cases)
|
70
|
+
assert_equal(1, ms[%w(m Paris black)].cases)
|
68
71
|
end
|
69
72
|
|
70
73
|
def test_stratum_proportion
|
71
|
-
ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
|
72
|
-
ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
|
73
|
-
assert_equal(5.0/12, ds1['q1'].proportion
|
74
|
-
assert_equal(7.0/9, ds2['q1'].proportion
|
75
|
-
ms=Statsample::Multiset.new(['q1'])
|
76
|
-
ms.add_dataset('d1',ds1)
|
77
|
-
ms.add_dataset('d2',ds2)
|
78
|
-
ss=Statsample::StratifiedSample.new(ms,
|
79
|
-
assert_in_delta(0.655, ss.proportion('q1'),0.01)
|
80
|
-
assert_in_delta(0.345, ss.proportion('q1',0),0.01)
|
81
|
-
|
74
|
+
ds1 = { 'q1' => [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0].to_vector }.to_dataset
|
75
|
+
ds2 = { 'q1' => [1, 1, 1, 1, 1, 1, 1, 0, 0].to_vector }.to_dataset
|
76
|
+
assert_equal(5.0 / 12, ds1['q1'].proportion)
|
77
|
+
assert_equal(7.0 / 9, ds2['q1'].proportion)
|
78
|
+
ms = Statsample::Multiset.new(['q1'])
|
79
|
+
ms.add_dataset('d1', ds1)
|
80
|
+
ms.add_dataset('d2', ds2)
|
81
|
+
ss = Statsample::StratifiedSample.new(ms, 'd1' => 50, 'd2' => 100)
|
82
|
+
assert_in_delta(0.655, ss.proportion('q1'), 0.01)
|
83
|
+
assert_in_delta(0.345, ss.proportion('q1', 0), 0.01)
|
82
84
|
end
|
85
|
+
|
83
86
|
def test_stratum_scale
|
84
|
-
boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
|
85
|
-
girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
|
86
|
-
ms=Statsample::Multiset.new(['test'])
|
87
|
-
ms.add_dataset('boys',boys)
|
88
|
-
ms.add_dataset('girls',girls)
|
89
|
-
ss=Statsample::StratifiedSample.new(ms,
|
90
|
-
assert_equal(2,ss.strata_number)
|
91
|
-
assert_equal(
|
92
|
-
assert_equal(
|
93
|
-
assert_equal(
|
94
|
-
assert_equal(36,ss.sample_size)
|
95
|
-
assert_equal(75,ss.mean('test'))
|
96
|
-
assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
|
97
|
-
assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
|
87
|
+
boys = { 'test' => [50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale) }.to_dataset
|
88
|
+
girls = { 'test' => [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale) }.to_dataset
|
89
|
+
ms = Statsample::Multiset.new(['test'])
|
90
|
+
ms.add_dataset('boys', boys)
|
91
|
+
ms.add_dataset('girls', girls)
|
92
|
+
ss = Statsample::StratifiedSample.new(ms, 'boys' => 10_000, 'girls' => 10_000)
|
93
|
+
assert_equal(2, ss.strata_number)
|
94
|
+
assert_equal(20_000, ss.population_size)
|
95
|
+
assert_equal(10_000, ss.stratum_size('boys'))
|
96
|
+
assert_equal(10_000, ss.stratum_size('girls'))
|
97
|
+
assert_equal(36, ss.sample_size)
|
98
|
+
assert_equal(75, ss.mean('test'))
|
99
|
+
assert_in_delta(1.45, ss.standard_error_wor('test'), 0.01)
|
100
|
+
assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'), 0.00001)
|
98
101
|
end
|
102
|
+
|
99
103
|
def test_each
|
100
|
-
xpe={
|
101
|
-
'a'
|
102
|
-
'b'
|
104
|
+
xpe = {
|
105
|
+
'a' => %w(a a a a).to_vector,
|
106
|
+
'b' => %w(b b b b).to_vector
|
103
107
|
}
|
104
|
-
ype={
|
105
|
-
'a'=>[1,2,3,4].to_scale,
|
106
|
-
'b'=>[5,6,7,8].to_scale
|
108
|
+
ype = {
|
109
|
+
'a' => [1, 2, 3, 4].to_scale,
|
110
|
+
'b' => [5, 6, 7, 8].to_scale
|
107
111
|
}
|
108
|
-
zpe={
|
109
|
-
'a'=>[10,11,12,13].to_scale,
|
110
|
-
'b'=>[14,15,16,17].to_scale
|
112
|
+
zpe = {
|
113
|
+
'a' => [10, 11, 12, 13].to_scale,
|
114
|
+
'b' => [14, 15, 16, 17].to_scale
|
111
115
|
}
|
112
|
-
xp,yp,zp=
|
113
|
-
@ms.each {|k,ds|
|
114
|
-
xp[k]=ds['x']
|
115
|
-
yp[k]=ds['y']
|
116
|
-
zp[k]=ds['z']
|
116
|
+
xp, yp, zp = {}, {}, {}
|
117
|
+
@ms.each {|k, ds|
|
118
|
+
xp[k] = ds['x']
|
119
|
+
yp[k] = ds['y']
|
120
|
+
zp[k] = ds['z']
|
117
121
|
}
|
118
|
-
assert_equal(xpe,xp)
|
119
|
-
assert_equal(ype,yp)
|
120
|
-
assert_equal(zpe,zp)
|
121
|
-
|
122
|
+
assert_equal(xpe, xp)
|
123
|
+
assert_equal(ype, yp)
|
124
|
+
assert_equal(zpe, zp)
|
122
125
|
end
|
126
|
+
|
123
127
|
def test_multiset_union_with_block
|
124
|
-
|
125
|
-
|
126
|
-
r2
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
ds['z'].recode!{|v|
|
135
|
-
|
128
|
+
r1 = rand
|
129
|
+
r2 = rand
|
130
|
+
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_scale
|
131
|
+
|
132
|
+
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_scale
|
133
|
+
|
134
|
+
ds2 = @ms.union {|k, ds|
|
135
|
+
ds['y'].recode!{|v|
|
136
|
+
k == 'a' ? v * r1 : v * r2
|
137
|
+
}
|
138
|
+
ds['z'].recode!{|v|
|
139
|
+
k == 'a' ? v * r1 : v * r2
|
140
|
+
}
|
136
141
|
}
|
137
|
-
assert_equal(ye,ds2['y'])
|
138
|
-
assert_equal(ze,ds2['z'])
|
142
|
+
assert_equal(ye, ds2['y'])
|
143
|
+
assert_equal(ze, ds2['z'])
|
139
144
|
end
|
145
|
+
|
140
146
|
def test_multiset_union
|
141
|
-
r1=rand
|
142
|
-
r2=rand
|
143
|
-
ye=[1*r1,2*r1,3*r1,4*r1,5*r2,6*r2,7*r2,8*r2].to_scale
|
144
|
-
|
145
|
-
ze=[10*r1,11*r1,12*r1,13*r1, 14*r2,15*r2,16*r2,17*r2].to_scale
|
146
|
-
@ms.each {|k,ds|
|
147
|
-
ds['y'].recode!{|v|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
147
|
+
r1 = rand
|
148
|
+
r2 = rand
|
149
|
+
ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_scale
|
150
|
+
|
151
|
+
ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_scale
|
152
|
+
@ms.each {|k, ds|
|
153
|
+
ds['y'].recode!{|v|
|
154
|
+
k == 'a' ? v * r1 : v * r2
|
155
|
+
}
|
156
|
+
ds['z'].recode!{|v|
|
157
|
+
k == 'a' ? v * r1 : v * r2
|
158
|
+
}
|
152
159
|
}
|
153
|
-
ds2
|
154
|
-
assert_equal(ye,ds2['y'])
|
155
|
-
assert_equal(ze,ds2['z'])
|
156
|
-
|
160
|
+
ds2 = @ms.union
|
161
|
+
assert_equal(ye, ds2['y'])
|
162
|
+
assert_equal(ze, ds2['z'])
|
157
163
|
end
|
158
164
|
end
|