statsample 1.4.3 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +8 -0
- data/benchmarks/correlation_matrix_15_variables.rb +1 -1
- data/benchmarks/correlation_matrix_5_variables.rb +1 -1
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +2 -2
- data/examples/dataset.rb +2 -2
- data/examples/icc.rb +1 -1
- data/examples/levene.rb +2 -2
- data/examples/parallel_analysis.rb +1 -1
- data/examples/u_test.rb +2 -2
- data/examples/vector.rb +1 -1
- data/examples/velicer_map_test.rb +1 -1
- data/lib/statsample.rb +30 -4
- data/lib/statsample/anova/oneway.rb +3 -3
- data/lib/statsample/anova/twoway.rb +3 -3
- data/lib/statsample/bivariate.rb +7 -7
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/converter/csv.rb +1 -1
- data/lib/statsample/converters.rb +7 -7
- data/lib/statsample/dataset.rb +8 -8
- data/lib/statsample/dominanceanalysis.rb +4 -4
- data/lib/statsample/dominanceanalysis/bootstrap.rb +8 -8
- data/lib/statsample/factor.rb +2 -4
- data/lib/statsample/factor/map.rb +2 -1
- data/lib/statsample/factor/parallelanalysis.rb +2 -2
- data/lib/statsample/factor/pca.rb +2 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/boxplot.rb +4 -4
- data/lib/statsample/graph/histogram.rb +2 -2
- data/lib/statsample/graph/scatterplot.rb +4 -4
- data/lib/statsample/matrix.rb +20 -6
- data/lib/statsample/regression.rb +2 -2
- data/lib/statsample/regression/multiple.rb +3 -3
- data/lib/statsample/regression/multiple/alglibengine.rb +5 -5
- data/lib/statsample/regression/multiple/baseengine.rb +3 -3
- data/lib/statsample/regression/multiple/gslengine.rb +5 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +4 -4
- data/lib/statsample/reliability/icc.rb +1 -1
- data/lib/statsample/reliability/multiscaleanalysis.rb +4 -4
- data/lib/statsample/reliability/scaleanalysis.rb +6 -6
- data/lib/statsample/reliability/skillscaleanalysis.rb +1 -1
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +1 -1
- data/lib/statsample/test/bartlettsphericity.rb +1 -1
- data/lib/statsample/test/levene.rb +4 -4
- data/lib/statsample/test/t.rb +3 -3
- data/lib/statsample/test/umannwhitney.rb +2 -2
- data/lib/statsample/vector.rb +103 -80
- data/lib/statsample/vector/gsl.rb +16 -16
- data/lib/statsample/version.rb +1 -1
- data/test/test_analysis.rb +1 -1
- data/test/test_anova_contrast.rb +4 -4
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +6 -6
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +3 -3
- data/test/test_bivariate.rb +38 -38
- data/test/test_crosstab.rb +2 -2
- data/test/test_csv.rb +6 -6
- data/test/test_dataset.rb +79 -79
- data/test/test_factor.rb +55 -49
- data/test/test_factor_pa.rb +4 -4
- data/test/test_ggobi.rb +3 -3
- data/test/test_gsl.rb +3 -3
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +5 -5
- data/test/test_multiset.rb +19 -19
- data/test/test_regression.rb +27 -27
- data/test/test_reliability.rb +14 -14
- data/test/test_reliability_icc.rb +7 -7
- data/test/test_reliability_skillscale.rb +6 -6
- data/test/test_resample.rb +1 -1
- data/test/test_rserve_extension.rb +4 -4
- data/test/test_statistics.rb +5 -5
- data/test/test_stest.rb +8 -8
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +5 -5
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +153 -119
- data/test/test_wilcoxonsignedrank.rb +4 -4
- data/test/test_xls.rb +6 -6
- metadata +3 -53
@@ -18,7 +18,7 @@ module Statsample
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def gsl
|
21
|
-
@gsl||=GSL::Vector.alloc(@
|
21
|
+
@gsl||=GSL::Vector.alloc(@numeric_data) if @numeric_data.size>0
|
22
22
|
end
|
23
23
|
|
24
24
|
alias :to_gsl :gsl
|
@@ -26,7 +26,7 @@ module Statsample
|
|
26
26
|
if flawed?
|
27
27
|
vector_standarized_compute_ruby(m,sd)
|
28
28
|
else
|
29
|
-
gsl.collect {|x| (x.to_f - m).quo(sd)}.
|
29
|
+
gsl.collect {|x| (x.to_f - m).quo(sd)}.to_numeric
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
@@ -34,20 +34,20 @@ module Statsample
|
|
34
34
|
if flawed?
|
35
35
|
vector_centered_compute_ruby(m)
|
36
36
|
else
|
37
|
-
gsl.collect {|x| (x.to_f - m)}.
|
37
|
+
gsl.collect {|x| (x.to_f - m)}.to_numeric
|
38
38
|
end
|
39
39
|
end
|
40
40
|
def sample_with_replacement(sample=1)
|
41
|
-
if(@type!=:
|
41
|
+
if(@type!=:numeric)
|
42
42
|
sample_with_replacement_ruby(sample)
|
43
43
|
else
|
44
44
|
r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
|
45
|
-
Statsample::Vector.new(r.sample(gsl, sample).to_a,:
|
45
|
+
Statsample::Vector.new(r.sample(gsl, sample).to_a,:numeric)
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
49
|
def sample_without_replacement(sample=1)
|
50
|
-
if(@type!=:
|
50
|
+
if(@type!=:numeric)
|
51
51
|
sample_without_replacement_ruby(sample)
|
52
52
|
else
|
53
53
|
r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
|
@@ -55,50 +55,50 @@ module Statsample
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
def median
|
58
|
-
if @type!=:
|
58
|
+
if @type!=:numeric
|
59
59
|
median_ruby
|
60
60
|
else
|
61
|
-
sorted=GSL::Vector.alloc(@
|
61
|
+
sorted=GSL::Vector.alloc(@numeric_data.sort)
|
62
62
|
GSL::Stats::median_from_sorted_data(sorted)
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
66
66
|
def sum
|
67
|
-
check_type :
|
67
|
+
check_type :numeric
|
68
68
|
gsl.nil? ? nil : gsl.sum
|
69
69
|
end
|
70
70
|
def mean
|
71
|
-
check_type :
|
71
|
+
check_type :numeric
|
72
72
|
gsl.nil? ? nil : gsl.mean
|
73
73
|
end
|
74
74
|
def variance_sample(m=nil)
|
75
|
-
check_type :
|
75
|
+
check_type :numeric
|
76
76
|
m||=mean
|
77
77
|
gsl.nil? ? nil : gsl.variance_m
|
78
78
|
end
|
79
79
|
|
80
80
|
def standard_deviation_sample(m=nil)
|
81
|
-
check_type :
|
81
|
+
check_type :numeric
|
82
82
|
m||=mean
|
83
83
|
gsl.nil? ? nil : gsl.sd(m)
|
84
84
|
end
|
85
85
|
|
86
86
|
def variance_population(m=nil) # :nodoc:
|
87
|
-
check_type :
|
87
|
+
check_type :numeric
|
88
88
|
m||=mean
|
89
89
|
gsl.nil? ? nil : gsl.variance_with_fixed_mean(m)
|
90
90
|
end
|
91
91
|
def standard_deviation_population(m=nil) # :nodoc:
|
92
|
-
check_type :
|
92
|
+
check_type :numeric
|
93
93
|
m||=mean
|
94
94
|
gsl.nil? ? nil : gsl.sd_with_fixed_mean(m)
|
95
95
|
end
|
96
96
|
def skew # :nodoc:
|
97
|
-
check_type :
|
97
|
+
check_type :numeric
|
98
98
|
gsl.nil? ? nil : gsl.skew
|
99
99
|
end
|
100
100
|
def kurtosis # :nodoc:
|
101
|
-
check_type :
|
101
|
+
check_type :numeric
|
102
102
|
gsl.nil? ? nil : gsl.kurtosis
|
103
103
|
end
|
104
104
|
end
|
data/lib/statsample/version.rb
CHANGED
data/test/test_analysis.rb
CHANGED
@@ -39,7 +39,7 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
39
39
|
should 'to_text returns the same as a normal ReportBuilder object' do
|
40
40
|
rb = ReportBuilder.new(name: :test)
|
41
41
|
section = ReportBuilder::Section.new(name: 'first')
|
42
|
-
a = [1, 2, 3].
|
42
|
+
a = [1, 2, 3].to_numeric
|
43
43
|
section.add('first')
|
44
44
|
section.add(a)
|
45
45
|
rb.add(section)
|
data/test/test_anova_contrast.rb
CHANGED
@@ -2,10 +2,10 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
2
2
|
class StatsampleAnovaContrastTestCase < Minitest::Test
|
3
3
|
context(Statsample::Anova::Contrast) do
|
4
4
|
setup do
|
5
|
-
constant = [12, 13, 11, 12, 12].
|
6
|
-
frequent = [9, 10, 9, 13, 14].
|
7
|
-
infrequent = [15, 16, 17, 16, 16].
|
8
|
-
never = [17, 18, 12, 18, 20].
|
5
|
+
constant = [12, 13, 11, 12, 12].to_numeric
|
6
|
+
frequent = [9, 10, 9, 13, 14].to_numeric
|
7
|
+
infrequent = [15, 16, 17, 16, 16].to_numeric
|
8
|
+
never = [17, 18, 12, 18, 20].to_numeric
|
9
9
|
@vectors = [constant, frequent, infrequent, never]
|
10
10
|
@c = Statsample::Anova::Contrast.new(vectors: @vectors)
|
11
11
|
end
|
@@ -4,7 +4,7 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
4
4
|
class StatsampleAnovaTwoWayWithVectorsTestCase < Minitest::Test
|
5
5
|
context(Statsample::Anova::TwoWayWithVectors) do
|
6
6
|
setup do
|
7
|
-
@pa = [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3].
|
7
|
+
@pa = [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3].to_numeric
|
8
8
|
@pa.name = 'Passive Avoidance'
|
9
9
|
@a = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1].to_vector
|
10
10
|
@a.labels = { 0 => '0%', 1 => '35%' }
|
@@ -3,9 +3,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
|
|
3
3
|
context(Statsample::Anova::OneWayWithVectors) do
|
4
4
|
context('when initializing') do
|
5
5
|
setup do
|
6
|
-
@v1 = 10.times.map { rand(100) }.
|
7
|
-
@v2 = 10.times.map { rand(100) }.
|
8
|
-
@v3 = 10.times.map { rand(100) }.
|
6
|
+
@v1 = 10.times.map { rand(100) }.to_numeric
|
7
|
+
@v2 = 10.times.map { rand(100) }.to_numeric
|
8
|
+
@v3 = 10.times.map { rand(100) }.to_numeric
|
9
9
|
end
|
10
10
|
should 'be the same using [] or args*' do
|
11
11
|
a1 = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3)
|
@@ -28,9 +28,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
setup do
|
31
|
-
@v1 = [3, 3, 2, 3, 6].to_vector(:
|
32
|
-
@v2 = [7, 6, 5, 6, 7].to_vector(:
|
33
|
-
@v3 = [9, 8, 9, 7, 8].to_vector(:
|
31
|
+
@v1 = [3, 3, 2, 3, 6].to_vector(:numeric)
|
32
|
+
@v2 = [7, 6, 5, 6, 7].to_vector(:numeric)
|
33
|
+
@v3 = [9, 8, 9, 7, 8].to_vector(:numeric)
|
34
34
|
@name = 'Anova testing'
|
35
35
|
@anova = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3, name: @name)
|
36
36
|
end
|
@@ -4,9 +4,9 @@ class StatsampleBartlettSphericityTestCase < Minitest::Test
|
|
4
4
|
include Statsample::Test
|
5
5
|
context Statsample::Test::BartlettSphericity do
|
6
6
|
setup do
|
7
|
-
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].
|
8
|
-
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].
|
9
|
-
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].
|
7
|
+
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric
|
8
|
+
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric
|
9
|
+
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric
|
10
10
|
# KMO: 0.490
|
11
11
|
ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
|
12
12
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
data/test/test_bivariate.rb
CHANGED
@@ -1,38 +1,38 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleBivariateTestCase < Minitest::Test
|
3
3
|
should 'method sum of squares should be correct' do
|
4
|
-
v1 = [1, 2, 3, 4, 5, 6].to_vector(:
|
5
|
-
v2 = [6, 2, 4, 10, 12, 8].to_vector(:
|
4
|
+
v1 = [1, 2, 3, 4, 5, 6].to_vector(:numeric)
|
5
|
+
v2 = [6, 2, 4, 10, 12, 8].to_vector(:numeric)
|
6
6
|
assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1, v2))
|
7
7
|
end
|
8
8
|
should_with_gsl 'return same covariance with ruby and gls implementation' do
|
9
|
-
v1 = 20.times.collect { |_a| rand }.
|
10
|
-
v2 = 20.times.collect { |_a| rand }.
|
9
|
+
v1 = 20.times.collect { |_a| rand }.to_numeric
|
10
|
+
v2 = 20.times.collect { |_a| rand }.to_numeric
|
11
11
|
assert_in_delta(Statsample::Bivariate.covariance(v1, v2), Statsample::Bivariate.covariance_slow(v1, v2), 0.001)
|
12
12
|
end
|
13
13
|
|
14
14
|
should_with_gsl 'return same correlation with ruby and gls implementation' do
|
15
|
-
v1 = 20.times.collect { |_a| rand }.
|
16
|
-
v2 = 20.times.collect { |_a| rand }.
|
15
|
+
v1 = 20.times.collect { |_a| rand }.to_numeric
|
16
|
+
v2 = 20.times.collect { |_a| rand }.to_numeric
|
17
17
|
|
18
18
|
assert_in_delta(GSL::Stats.correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
|
19
19
|
end
|
20
20
|
should 'return correct pearson correlation' do
|
21
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:
|
22
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:
|
21
|
+
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
|
22
|
+
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
|
23
23
|
assert_in_delta(0.525, Statsample::Bivariate.pearson(v1, v2), 0.001)
|
24
24
|
assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v1, v2), 0.001)
|
25
25
|
|
26
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil].to_vector(:
|
27
|
-
v4 = [2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500].to_vector(:
|
26
|
+
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil].to_vector(:numeric)
|
27
|
+
v4 = [2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500].to_vector(:numeric)
|
28
28
|
assert_in_delta(0.525, Statsample::Bivariate.pearson(v3, v4), 0.001)
|
29
29
|
# Test ruby method
|
30
30
|
v3a, v4a = Statsample.only_valid v3, v4
|
31
31
|
assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a, v4a), 0.001)
|
32
32
|
end
|
33
33
|
should 'return correct values for t_pearson and prop_pearson' do
|
34
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:
|
35
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:
|
34
|
+
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
|
35
|
+
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
|
36
36
|
r = Statsample::Bivariate::Pearson.new(v1, v2)
|
37
37
|
assert_in_delta(0.525, r.r, 0.001)
|
38
38
|
assert_in_delta(Statsample::Bivariate.t_pearson(v1, v2), r.t, 0.001)
|
@@ -40,10 +40,10 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
40
40
|
assert(r.summary.size > 0)
|
41
41
|
end
|
42
42
|
should 'return correct correlation_matrix with nils values' do
|
43
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:
|
44
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:
|
45
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:
|
46
|
-
v4 = [2, nil, nil, nil, 3, 7, 8, 6].to_vector(:
|
43
|
+
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
|
44
|
+
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
|
45
|
+
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:numeric)
|
46
|
+
v4 = [2, nil, nil, nil, 3, 7, 8, 6].to_vector(:numeric)
|
47
47
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
|
48
48
|
c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
|
49
49
|
expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
|
@@ -61,11 +61,11 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
61
61
|
end
|
62
62
|
should_with_gsl 'return same values for optimized and pairwise covariance matrix' do
|
63
63
|
cases = 100
|
64
|
-
v1 = Statsample::Vector.
|
65
|
-
v2 = Statsample::Vector.
|
66
|
-
v3 = Statsample::Vector.
|
67
|
-
v4 = Statsample::Vector.
|
68
|
-
v5 = Statsample::Vector.
|
64
|
+
v1 = Statsample::Vector.new_numeric(cases) { rand }
|
65
|
+
v2 = Statsample::Vector.new_numeric(cases) { rand }
|
66
|
+
v3 = Statsample::Vector.new_numeric(cases) { rand }
|
67
|
+
v4 = Statsample::Vector.new_numeric(cases) { rand }
|
68
|
+
v5 = Statsample::Vector.new_numeric(cases) { rand }
|
69
69
|
|
70
70
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
|
71
71
|
|
@@ -76,11 +76,11 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
76
76
|
end
|
77
77
|
should_with_gsl 'return same values for optimized and pairwise correlation matrix' do
|
78
78
|
cases = 100
|
79
|
-
v1 = Statsample::Vector.
|
80
|
-
v2 = Statsample::Vector.
|
81
|
-
v3 = Statsample::Vector.
|
82
|
-
v4 = Statsample::Vector.
|
83
|
-
v5 = Statsample::Vector.
|
79
|
+
v1 = Statsample::Vector.new_numeric(cases) { rand }
|
80
|
+
v2 = Statsample::Vector.new_numeric(cases) { rand }
|
81
|
+
v3 = Statsample::Vector.new_numeric(cases) { rand }
|
82
|
+
v4 = Statsample::Vector.new_numeric(cases) { rand }
|
83
|
+
v5 = Statsample::Vector.new_numeric(cases) { rand }
|
84
84
|
|
85
85
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
|
86
86
|
|
@@ -90,10 +90,10 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
90
90
|
assert_equal_matrix(cor_opt, cor_pw, 1e-15)
|
91
91
|
end
|
92
92
|
should 'return correct correlation_matrix without nils values' do
|
93
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:
|
94
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:
|
95
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:
|
96
|
-
v4 = [2, 4, 6, 7, 3, 7, 8, 6].to_vector(:
|
93
|
+
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
|
94
|
+
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
|
95
|
+
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:numeric)
|
96
|
+
v4 = [2, 4, 6, 7, 3, 7, 8, 6].to_vector(:numeric)
|
97
97
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
|
98
98
|
c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
|
99
99
|
expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
|
@@ -129,25 +129,25 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
129
129
|
end
|
130
130
|
|
131
131
|
should "return correct value for Spearman's rho" do
|
132
|
-
v1 = [86, 97, 99, 100, 101, 103, 106, 110, 112, 113].to_vector(:
|
133
|
-
v2 = [0, 20, 28, 27, 50, 29, 7, 17, 6, 12].to_vector(:
|
132
|
+
v1 = [86, 97, 99, 100, 101, 103, 106, 110, 112, 113].to_vector(:numeric)
|
133
|
+
v2 = [0, 20, 28, 27, 50, 29, 7, 17, 6, 12].to_vector(:numeric)
|
134
134
|
assert_in_delta(-0.175758, Statsample::Bivariate.spearman(v1, v2), 0.0001)
|
135
135
|
end
|
136
136
|
should 'return correct value for point_biserial correlation' do
|
137
|
-
c = [1, 3, 5, 6, 7, 100, 200, 300, 400, 300].to_vector(:
|
138
|
-
d = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0].to_vector(:
|
137
|
+
c = [1, 3, 5, 6, 7, 100, 200, 300, 400, 300].to_vector(:numeric)
|
138
|
+
d = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0].to_vector(:numeric)
|
139
139
|
assert_raises TypeError do
|
140
140
|
Statsample::Bivariate.point_biserial(c, d)
|
141
141
|
end
|
142
142
|
assert_in_delta(Statsample::Bivariate.point_biserial(d, c), Statsample::Bivariate.pearson(d, c), 0.0001)
|
143
143
|
end
|
144
144
|
should 'return correct value for tau_a and tau_b' do
|
145
|
-
v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_vector(:
|
146
|
-
v2 = [1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11].to_vector(:
|
145
|
+
v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_vector(:numeric)
|
146
|
+
v2 = [1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11].to_vector(:numeric)
|
147
147
|
assert_in_delta(0.6727, Statsample::Bivariate.tau_a(v1, v2), 0.001)
|
148
148
|
assert_in_delta(0.6727, Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1, v2).to_matrix)), 0.001)
|
149
|
-
v1 = [12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27].to_vector(:
|
150
|
-
v2 = [11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0].to_vector(:
|
149
|
+
v1 = [12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27].to_vector(:numeric)
|
150
|
+
v2 = [11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0].to_vector(:numeric)
|
151
151
|
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1, v2).to_matrix), 0.001)
|
152
152
|
end
|
153
153
|
should 'return correct value for gamma correlation' do
|
data/test/test_crosstab.rb
CHANGED
@@ -58,8 +58,8 @@ class StatsampleCrosstabTestCase < Minitest::Test
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_crosstab_with_scale
|
61
|
-
v1 = %w(1 1 1 1 1 0 0 0 0 0).
|
62
|
-
v2 = %w(0 0 0 0 0 1 1 1 1 1).
|
61
|
+
v1 = %w(1 1 1 1 1 0 0 0 0 0).to_numeric
|
62
|
+
v2 = %w(0 0 0 0 0 1 1 1 1 1).to_numeric
|
63
63
|
ct = Statsample::Crosstab.new(v1, v2)
|
64
64
|
assert_equal(Matrix[[0, 5], [5, 0]], ct.to_matrix)
|
65
65
|
assert_nothing_raised { ct.summary }
|
data/test/test_csv.rb
CHANGED
@@ -8,11 +8,11 @@ class StatsampleCSVTestCase < Minitest::Test
|
|
8
8
|
def test_read
|
9
9
|
header = %w(id name age city a1)
|
10
10
|
data = {
|
11
|
-
'id' => [1, 2, 3, 4, 5, 6].to_vector(:
|
12
|
-
'name' => %w(Alex Claude Peter Franz George Fernand).to_vector(:
|
13
|
-
'age' => [20, 23, 25, 27, 5.5, nil].to_vector(:
|
14
|
-
'city' => ['New York', 'London', 'London', 'Paris', 'Tome', nil].to_vector(:
|
15
|
-
'a1' => ['a,b', 'b,c', 'a', nil, 'a,b,c', nil].to_vector(:
|
11
|
+
'id' => [1, 2, 3, 4, 5, 6].to_vector(:numeric),
|
12
|
+
'name' => %w(Alex Claude Peter Franz George Fernand).to_vector(:object),
|
13
|
+
'age' => [20, 23, 25, 27, 5.5, nil].to_vector(:numeric),
|
14
|
+
'city' => ['New York', 'London', 'London', 'Paris', 'Tome', nil].to_vector(:object),
|
15
|
+
'a1' => ['a,b', 'b,c', 'a', nil, 'a,b,c', nil].to_vector(:object)
|
16
16
|
}
|
17
17
|
|
18
18
|
ds_exp = Statsample::Dataset.new(data, header)
|
@@ -34,7 +34,7 @@ class StatsampleCSVTestCase < Minitest::Test
|
|
34
34
|
def test_repeated
|
35
35
|
ds = Statsample::CSV.read('test/fixtures/repeated_fields.csv')
|
36
36
|
assert_equal(%w(id name_1 age_1 city a1 name_2 age_2), ds.fields)
|
37
|
-
age = [3, 4, 5, 6, nil, 8].to_vector(:
|
37
|
+
age = [3, 4, 5, 6, nil, 8].to_vector(:numeric)
|
38
38
|
assert_equal(age, ds['age_2'])
|
39
39
|
end
|
40
40
|
|
data/test/test_dataset.rb
CHANGED
@@ -58,9 +58,9 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_merge
|
61
|
-
a = [1, 2, 3].
|
61
|
+
a = [1, 2, 3].to_numeric
|
62
62
|
b = [3, 4, 5].to_vector
|
63
|
-
c = [4, 5, 6].
|
63
|
+
c = [4, 5, 6].to_numeric
|
64
64
|
d = [7, 8, 9].to_vector
|
65
65
|
e = [10, 20, 30].to_vector
|
66
66
|
ds1 = { 'a' => a, 'b' => b }.to_dataset
|
@@ -117,51 +117,51 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
117
117
|
end
|
118
118
|
|
119
119
|
def test_vector_by_calculation
|
120
|
-
a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:
|
121
|
-
a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:
|
122
|
-
a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:
|
120
|
+
a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:numeric)
|
121
|
+
a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:numeric)
|
122
|
+
a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:numeric)
|
123
123
|
ds = { 'a1' => a1, 'a2' => a2, 'a3' => a3 }.to_dataset
|
124
124
|
total = ds.vector_by_calculation {|row|
|
125
125
|
row['a1'] + row['a2'] + row['a3']
|
126
126
|
}
|
127
|
-
expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:
|
127
|
+
expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:numeric)
|
128
128
|
assert_equal(expected, total)
|
129
129
|
end
|
130
130
|
|
131
131
|
def test_vector_sum
|
132
|
-
a1 = [1, 2, 3, 4, 5, nil].to_vector(:
|
133
|
-
a2 = [10, 10, 20, 20, 20, 30].to_vector(:
|
134
|
-
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:
|
135
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:
|
132
|
+
a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
|
133
|
+
a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
|
134
|
+
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
|
135
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
136
136
|
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2 }.to_dataset
|
137
137
|
total = ds.vector_sum
|
138
138
|
a = ds.vector_sum(%w(a1 a2))
|
139
139
|
b = ds.vector_sum(%w(b1 b2))
|
140
|
-
expected_a = [11, 12, 23, 24, 25, nil].to_vector(:
|
141
|
-
expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:
|
142
|
-
expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:
|
140
|
+
expected_a = [11, 12, 23, 24, 25, nil].to_vector(:numeric)
|
141
|
+
expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:numeric)
|
142
|
+
expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:numeric)
|
143
143
|
assert_equal(expected_a, a)
|
144
144
|
assert_equal(expected_b, b)
|
145
145
|
assert_equal(expected_total, total)
|
146
146
|
end
|
147
147
|
|
148
148
|
def test_vector_missing_values
|
149
|
-
a1 = [1, nil, 3, 4, 5, nil].to_vector(:
|
150
|
-
a2 = [10, nil, 20, 20, 20, 30].to_vector(:
|
151
|
-
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:
|
152
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:
|
153
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:
|
149
|
+
a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
|
150
|
+
a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
|
151
|
+
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
|
152
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
153
|
+
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
154
154
|
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
155
|
-
mva = [2, 3, 0, 1, 0, 1].to_vector(:
|
155
|
+
mva = [2, 3, 0, 1, 0, 1].to_vector(:numeric)
|
156
156
|
assert_equal(mva, ds.vector_missing_values)
|
157
157
|
end
|
158
158
|
|
159
159
|
def test_has_missing_values
|
160
|
-
a1 = [1, nil, 3, 4, 5, nil].to_vector(:
|
161
|
-
a2 = [10, nil, 20, 20, 20, 30].to_vector(:
|
162
|
-
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:
|
163
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:
|
164
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:
|
160
|
+
a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
|
161
|
+
a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
|
162
|
+
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
|
163
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
164
|
+
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
165
165
|
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
166
166
|
assert(ds.has_missing_data?)
|
167
167
|
clean = ds.dup_only_valid
|
@@ -169,31 +169,31 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
169
169
|
end
|
170
170
|
|
171
171
|
def test_vector_count_characters
|
172
|
-
a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:
|
173
|
-
a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:
|
174
|
-
b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:
|
175
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:
|
176
|
-
c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:
|
172
|
+
a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:numeric)
|
173
|
+
a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:numeric)
|
174
|
+
b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:numeric)
|
175
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
176
|
+
c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:numeric)
|
177
177
|
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
178
|
-
exp = [4, 17, 27, 5, 6, 5].to_vector(:
|
178
|
+
exp = [4, 17, 27, 5, 6, 5].to_vector(:numeric)
|
179
179
|
assert_equal(exp, ds.vector_count_characters)
|
180
180
|
end
|
181
181
|
|
182
182
|
def test_vector_mean
|
183
|
-
a1 = [1, 2, 3, 4, 5, nil].to_vector(:
|
184
|
-
a2 = [10, 10, 20, 20, 20, 30].to_vector(:
|
185
|
-
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:
|
186
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:
|
187
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:
|
183
|
+
a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
|
184
|
+
a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
|
185
|
+
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
|
186
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
187
|
+
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
188
188
|
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
189
189
|
total = ds.vector_mean
|
190
190
|
a = ds.vector_mean(%w(a1 a2), 1)
|
191
191
|
b = ds.vector_mean(%w(b1 b2), 1)
|
192
192
|
c = ds.vector_mean(%w(b1 b2 c), 1)
|
193
|
-
expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:
|
194
|
-
expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:
|
195
|
-
expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:
|
196
|
-
expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:
|
193
|
+
expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:numeric)
|
194
|
+
expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:numeric)
|
195
|
+
expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:numeric)
|
196
|
+
expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:numeric)
|
197
197
|
assert_equal(expected_a, a)
|
198
198
|
assert_equal(expected_b, b)
|
199
199
|
assert_equal(expected_c, c)
|
@@ -210,9 +210,9 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
210
210
|
end
|
211
211
|
|
212
212
|
def test_recode
|
213
|
-
@ds['age'].type = :
|
213
|
+
@ds['age'].type = :numeric
|
214
214
|
@ds.recode!('age') { |c| c['id'] * 2 }
|
215
|
-
expected = [2, 4, 6, 8, 10].to_vector(:
|
215
|
+
expected = [2, 4, 6, 8, 10].to_vector(:numeric)
|
216
216
|
assert_equal(expected, @ds['age'])
|
217
217
|
end
|
218
218
|
|
@@ -231,8 +231,8 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
231
231
|
end
|
232
232
|
|
233
233
|
def test_change_type
|
234
|
-
@ds.col('age').type = :
|
235
|
-
assert_equal(:
|
234
|
+
@ds.col('age').type = :numeric
|
235
|
+
assert_equal(:numeric, @ds.col('age').type)
|
236
236
|
end
|
237
237
|
|
238
238
|
def test_split_by_separator_recode
|
@@ -255,13 +255,13 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
255
255
|
end
|
256
256
|
|
257
257
|
def test_percentiles
|
258
|
-
v1 = (1..100).to_a.
|
258
|
+
v1 = (1..100).to_a.to_numeric
|
259
259
|
assert_equal(50.5, v1.median)
|
260
260
|
assert_equal(25.5, v1.percentil(25))
|
261
|
-
v2 = (1..99).to_a.
|
261
|
+
v2 = (1..99).to_a.to_numeric
|
262
262
|
assert_equal(50, v2.median)
|
263
263
|
assert_equal(25, v2.percentil(25))
|
264
|
-
v3 = (1..50).to_a.
|
264
|
+
v3 = (1..50).to_a.to_numeric
|
265
265
|
assert_equal(25.5, v3.median)
|
266
266
|
assert_equal(13, v3.percentil(25))
|
267
267
|
end
|
@@ -336,7 +336,7 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
336
336
|
|
337
337
|
assert_equal(ds1.fields, ds2.fields)
|
338
338
|
assert_not_same(ds1.fields, ds2.fields)
|
339
|
-
ds1['v1'].type = :
|
339
|
+
ds1['v1'].type = :numeric
|
340
340
|
# dup partial
|
341
341
|
ds3 = ds1.dup('v1')
|
342
342
|
ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
|
@@ -355,7 +355,7 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
355
355
|
assert_not_equal(ds1['v1'], ds3['v1'])
|
356
356
|
assert_equal([], ds3['v1'].data)
|
357
357
|
assert_equal([], ds3['v2'].data)
|
358
|
-
assert_equal(:
|
358
|
+
assert_equal(:numeric, ds3['v1'].type)
|
359
359
|
assert_equal(ds1.fields, ds2.fields)
|
360
360
|
assert_not_same(ds1.fields, ds2.fields)
|
361
361
|
end
|
@@ -368,9 +368,9 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
368
368
|
end
|
369
369
|
|
370
370
|
def test_each_array_with_nils
|
371
|
-
v1 = [1, -99, 3, 4, 'na'].to_vector(:
|
372
|
-
v2 = [5, 6, -99, 8, 20].to_vector(:
|
373
|
-
v3 = [9, 10, 11, 12, 20].to_vector(:
|
371
|
+
v1 = [1, -99, 3, 4, 'na'].to_vector(:numeric, missing_values: [-99, 'na'])
|
372
|
+
v2 = [5, 6, -99, 8, 20].to_vector(:numeric, missing_values: [-99])
|
373
|
+
v3 = [9, 10, 11, 12, 20].to_vector(:numeric, missing_values: [-99])
|
374
374
|
ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
|
375
375
|
ds2 = ds1.dup_empty
|
376
376
|
ds1.each_array_with_nils {|row|
|
@@ -382,40 +382,40 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
382
382
|
end
|
383
383
|
|
384
384
|
def test_dup_only_valid
|
385
|
-
v1 = [1, nil, 3, 4].to_vector(:
|
386
|
-
v2 = [5, 6, nil, 8].to_vector(:
|
387
|
-
v3 = [9, 10, 11, 12].to_vector(:
|
385
|
+
v1 = [1, nil, 3, 4].to_vector(:numeric)
|
386
|
+
v2 = [5, 6, nil, 8].to_vector(:numeric)
|
387
|
+
v3 = [9, 10, 11, 12].to_vector(:numeric)
|
388
388
|
ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
|
389
389
|
ds2 = ds1.dup_only_valid
|
390
|
-
expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:
|
390
|
+
expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:numeric), 'v2' => [5, 8].to_vector(:numeric), 'v3' => [9, 12].to_vector(:numeric))
|
391
391
|
assert_equal(expected, ds2)
|
392
392
|
assert_equal(expected.vectors.values, Statsample.only_valid(v1, v2, v3))
|
393
|
-
expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:
|
393
|
+
expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:numeric), 'v3' => [9, 11, 12].to_vector(:numeric))
|
394
394
|
assert_equal(expected_partial, ds1.dup_only_valid(%w(v1 v3)))
|
395
395
|
end
|
396
396
|
|
397
397
|
def test_filter
|
398
|
-
@ds['age'].type = :
|
398
|
+
@ds['age'].type = :numeric
|
399
399
|
filtered = @ds.filter { |c| c['id'] == 2 or c['id'] == 4 }
|
400
|
-
expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :
|
400
|
+
expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :numeric),
|
401
401
|
'city' => Statsample::Vector.new(%w(London Paris)),
|
402
402
|
'a1' => Statsample::Vector.new(['b,c', nil]) }, %w(id name age city a1))
|
403
403
|
assert_equal(expected, filtered)
|
404
404
|
end
|
405
405
|
|
406
406
|
def test_filter_field
|
407
|
-
@ds['age'].type = :
|
407
|
+
@ds['age'].type = :numeric
|
408
408
|
filtered = @ds.filter_field('id') { |c| c['id'] == 2 or c['id'] == 4 }
|
409
409
|
expected = [2, 4].to_vector
|
410
410
|
assert_equal(expected, filtered)
|
411
411
|
end
|
412
412
|
|
413
413
|
def test_verify
|
414
|
-
name = %w(r1 r2 r3 r4).to_vector(:
|
415
|
-
v1 = [1, 2, 3, 4].to_vector(:
|
416
|
-
v2 = [4, 3, 2, 1].to_vector(:
|
417
|
-
v3 = [10, 20, 30, 40].to_vector(:
|
418
|
-
v4 = %w(a b a b).to_vector(:
|
414
|
+
name = %w(r1 r2 r3 r4).to_vector(:object)
|
415
|
+
v1 = [1, 2, 3, 4].to_vector(:numeric)
|
416
|
+
v2 = [4, 3, 2, 1].to_vector(:numeric)
|
417
|
+
v3 = [10, 20, 30, 40].to_vector(:numeric)
|
418
|
+
v4 = %w(a b a b).to_vector(:object)
|
419
419
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'id' => name }.to_dataset
|
420
420
|
ds.fields = %w(v1 v2 v3 v4 id)
|
421
421
|
# Correct
|
@@ -432,14 +432,14 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
432
432
|
end
|
433
433
|
|
434
434
|
def test_compute_operation
|
435
|
-
v1 = [1, 2, 3, 4].to_vector(:
|
436
|
-
v2 = [4, 3, 2, 1].to_vector(:
|
437
|
-
v3 = [10, 20, 30, 40].to_vector(:
|
438
|
-
|
439
|
-
vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:
|
440
|
-
vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:
|
435
|
+
v1 = [1, 2, 3, 4].to_vector(:numeric)
|
436
|
+
v2 = [4, 3, 2, 1].to_vector(:numeric)
|
437
|
+
v3 = [10, 20, 30, 40].to_vector(:numeric)
|
438
|
+
vnumeric = [1.quo(2), 1, 3.quo(2), 2].to_vector(:numeric)
|
439
|
+
vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:numeric)
|
440
|
+
vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:numeric)
|
441
441
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3 }.to_dataset
|
442
|
-
assert_equal(
|
442
|
+
assert_equal(vnumeric, ds.compute('v1/2'))
|
443
443
|
assert_equal(vsum, ds.compute('v1+v2+v3'))
|
444
444
|
assert_equal(vmult, ds.compute('v1*v2'))
|
445
445
|
end
|
@@ -447,15 +447,15 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
447
447
|
def test_crosstab_with_asignation
|
448
448
|
v1 = %w(a a a b b b c c c).to_vector
|
449
449
|
v2 = %w(a b c a b c a b c).to_vector
|
450
|
-
v3 = %w(0 1 0 0 1 1 0 0 1).
|
450
|
+
v3 = %w(0 1 0 0 1 1 0 0 1).to_numeric
|
451
451
|
ds = Statsample::Dataset.crosstab_by_asignation(v1, v2, v3)
|
452
|
-
assert_equal(:
|
453
|
-
assert_equal(:
|
454
|
-
assert_equal(:
|
452
|
+
assert_equal(:object, ds['_id'].type)
|
453
|
+
assert_equal(:numeric, ds['a'].type)
|
454
|
+
assert_equal(:numeric, ds['b'].type)
|
455
455
|
ev_id = %w(a b c).to_vector
|
456
|
-
ev_a = %w(0 0 0).
|
457
|
-
ev_b = %w(1 1 0).
|
458
|
-
ev_c = %w(0 1 1).
|
456
|
+
ev_a = %w(0 0 0).to_numeric
|
457
|
+
ev_b = %w(1 1 0).to_numeric
|
458
|
+
ev_c = %w(0 1 1).to_numeric
|
459
459
|
ds2 = { '_id' => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }.to_dataset
|
460
460
|
assert_equal(ds, ds2)
|
461
461
|
end
|
@@ -472,7 +472,7 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
472
472
|
ids = %w(1 1 2 2 2).to_vector
|
473
473
|
colors = %w(red blue green orange white).to_vector
|
474
474
|
values = [10, 20, 15, 30, 20].to_vector
|
475
|
-
col_ids = [1, 2, 1, 2, 3].
|
475
|
+
col_ids = [1, 2, 1, 2, 3].to_numeric
|
476
476
|
ds_expected = { 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values }.to_dataset(%w(id _col_id color value))
|
477
477
|
assert_equal(ds_expected, ds.one_to_many(%w(id), 'car_%v%n'))
|
478
478
|
end
|