statsample 1.4.3 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +8 -0
- data/benchmarks/correlation_matrix_15_variables.rb +1 -1
- data/benchmarks/correlation_matrix_5_variables.rb +1 -1
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +2 -2
- data/examples/dataset.rb +2 -2
- data/examples/icc.rb +1 -1
- data/examples/levene.rb +2 -2
- data/examples/parallel_analysis.rb +1 -1
- data/examples/u_test.rb +2 -2
- data/examples/vector.rb +1 -1
- data/examples/velicer_map_test.rb +1 -1
- data/lib/statsample.rb +30 -4
- data/lib/statsample/anova/oneway.rb +3 -3
- data/lib/statsample/anova/twoway.rb +3 -3
- data/lib/statsample/bivariate.rb +7 -7
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/converter/csv.rb +1 -1
- data/lib/statsample/converters.rb +7 -7
- data/lib/statsample/dataset.rb +8 -8
- data/lib/statsample/dominanceanalysis.rb +4 -4
- data/lib/statsample/dominanceanalysis/bootstrap.rb +8 -8
- data/lib/statsample/factor.rb +2 -4
- data/lib/statsample/factor/map.rb +2 -1
- data/lib/statsample/factor/parallelanalysis.rb +2 -2
- data/lib/statsample/factor/pca.rb +2 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/boxplot.rb +4 -4
- data/lib/statsample/graph/histogram.rb +2 -2
- data/lib/statsample/graph/scatterplot.rb +4 -4
- data/lib/statsample/matrix.rb +20 -6
- data/lib/statsample/regression.rb +2 -2
- data/lib/statsample/regression/multiple.rb +3 -3
- data/lib/statsample/regression/multiple/alglibengine.rb +5 -5
- data/lib/statsample/regression/multiple/baseengine.rb +3 -3
- data/lib/statsample/regression/multiple/gslengine.rb +5 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +4 -4
- data/lib/statsample/reliability/icc.rb +1 -1
- data/lib/statsample/reliability/multiscaleanalysis.rb +4 -4
- data/lib/statsample/reliability/scaleanalysis.rb +6 -6
- data/lib/statsample/reliability/skillscaleanalysis.rb +1 -1
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +1 -1
- data/lib/statsample/test/bartlettsphericity.rb +1 -1
- data/lib/statsample/test/levene.rb +4 -4
- data/lib/statsample/test/t.rb +3 -3
- data/lib/statsample/test/umannwhitney.rb +2 -2
- data/lib/statsample/vector.rb +103 -80
- data/lib/statsample/vector/gsl.rb +16 -16
- data/lib/statsample/version.rb +1 -1
- data/test/test_analysis.rb +1 -1
- data/test/test_anova_contrast.rb +4 -4
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +6 -6
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +3 -3
- data/test/test_bivariate.rb +38 -38
- data/test/test_crosstab.rb +2 -2
- data/test/test_csv.rb +6 -6
- data/test/test_dataset.rb +79 -79
- data/test/test_factor.rb +55 -49
- data/test/test_factor_pa.rb +4 -4
- data/test/test_ggobi.rb +3 -3
- data/test/test_gsl.rb +3 -3
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +5 -5
- data/test/test_multiset.rb +19 -19
- data/test/test_regression.rb +27 -27
- data/test/test_reliability.rb +14 -14
- data/test/test_reliability_icc.rb +7 -7
- data/test/test_reliability_skillscale.rb +6 -6
- data/test/test_resample.rb +1 -1
- data/test/test_rserve_extension.rb +4 -4
- data/test/test_statistics.rb +5 -5
- data/test/test_stest.rb +8 -8
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +5 -5
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +153 -119
- data/test/test_wilcoxonsignedrank.rb +4 -4
- data/test/test_xls.rb +6 -6
- metadata +3 -53
@@ -18,7 +18,7 @@ module Statsample
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def gsl
|
21
|
-
@gsl||=GSL::Vector.alloc(@
|
21
|
+
@gsl||=GSL::Vector.alloc(@numeric_data) if @numeric_data.size>0
|
22
22
|
end
|
23
23
|
|
24
24
|
alias :to_gsl :gsl
|
@@ -26,7 +26,7 @@ module Statsample
|
|
26
26
|
if flawed?
|
27
27
|
vector_standarized_compute_ruby(m,sd)
|
28
28
|
else
|
29
|
-
gsl.collect {|x| (x.to_f - m).quo(sd)}.
|
29
|
+
gsl.collect {|x| (x.to_f - m).quo(sd)}.to_numeric
|
30
30
|
end
|
31
31
|
end
|
32
32
|
|
@@ -34,20 +34,20 @@ module Statsample
|
|
34
34
|
if flawed?
|
35
35
|
vector_centered_compute_ruby(m)
|
36
36
|
else
|
37
|
-
gsl.collect {|x| (x.to_f - m)}.
|
37
|
+
gsl.collect {|x| (x.to_f - m)}.to_numeric
|
38
38
|
end
|
39
39
|
end
|
40
40
|
def sample_with_replacement(sample=1)
|
41
|
-
if(@type!=:
|
41
|
+
if(@type!=:numeric)
|
42
42
|
sample_with_replacement_ruby(sample)
|
43
43
|
else
|
44
44
|
r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
|
45
|
-
Statsample::Vector.new(r.sample(gsl, sample).to_a,:
|
45
|
+
Statsample::Vector.new(r.sample(gsl, sample).to_a,:numeric)
|
46
46
|
end
|
47
47
|
end
|
48
48
|
|
49
49
|
def sample_without_replacement(sample=1)
|
50
|
-
if(@type!=:
|
50
|
+
if(@type!=:numeric)
|
51
51
|
sample_without_replacement_ruby(sample)
|
52
52
|
else
|
53
53
|
r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
|
@@ -55,50 +55,50 @@ module Statsample
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
def median
|
58
|
-
if @type!=:
|
58
|
+
if @type!=:numeric
|
59
59
|
median_ruby
|
60
60
|
else
|
61
|
-
sorted=GSL::Vector.alloc(@
|
61
|
+
sorted=GSL::Vector.alloc(@numeric_data.sort)
|
62
62
|
GSL::Stats::median_from_sorted_data(sorted)
|
63
63
|
end
|
64
64
|
end
|
65
65
|
|
66
66
|
def sum
|
67
|
-
check_type :
|
67
|
+
check_type :numeric
|
68
68
|
gsl.nil? ? nil : gsl.sum
|
69
69
|
end
|
70
70
|
def mean
|
71
|
-
check_type :
|
71
|
+
check_type :numeric
|
72
72
|
gsl.nil? ? nil : gsl.mean
|
73
73
|
end
|
74
74
|
def variance_sample(m=nil)
|
75
|
-
check_type :
|
75
|
+
check_type :numeric
|
76
76
|
m||=mean
|
77
77
|
gsl.nil? ? nil : gsl.variance_m
|
78
78
|
end
|
79
79
|
|
80
80
|
def standard_deviation_sample(m=nil)
|
81
|
-
check_type :
|
81
|
+
check_type :numeric
|
82
82
|
m||=mean
|
83
83
|
gsl.nil? ? nil : gsl.sd(m)
|
84
84
|
end
|
85
85
|
|
86
86
|
def variance_population(m=nil) # :nodoc:
|
87
|
-
check_type :
|
87
|
+
check_type :numeric
|
88
88
|
m||=mean
|
89
89
|
gsl.nil? ? nil : gsl.variance_with_fixed_mean(m)
|
90
90
|
end
|
91
91
|
def standard_deviation_population(m=nil) # :nodoc:
|
92
|
-
check_type :
|
92
|
+
check_type :numeric
|
93
93
|
m||=mean
|
94
94
|
gsl.nil? ? nil : gsl.sd_with_fixed_mean(m)
|
95
95
|
end
|
96
96
|
def skew # :nodoc:
|
97
|
-
check_type :
|
97
|
+
check_type :numeric
|
98
98
|
gsl.nil? ? nil : gsl.skew
|
99
99
|
end
|
100
100
|
def kurtosis # :nodoc:
|
101
|
-
check_type :
|
101
|
+
check_type :numeric
|
102
102
|
gsl.nil? ? nil : gsl.kurtosis
|
103
103
|
end
|
104
104
|
end
|
data/lib/statsample/version.rb
CHANGED
data/test/test_analysis.rb
CHANGED
@@ -39,7 +39,7 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
39
39
|
should 'to_text returns the same as a normal ReportBuilder object' do
|
40
40
|
rb = ReportBuilder.new(name: :test)
|
41
41
|
section = ReportBuilder::Section.new(name: 'first')
|
42
|
-
a = [1, 2, 3].
|
42
|
+
a = [1, 2, 3].to_numeric
|
43
43
|
section.add('first')
|
44
44
|
section.add(a)
|
45
45
|
rb.add(section)
|
data/test/test_anova_contrast.rb
CHANGED
@@ -2,10 +2,10 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
2
2
|
class StatsampleAnovaContrastTestCase < Minitest::Test
|
3
3
|
context(Statsample::Anova::Contrast) do
|
4
4
|
setup do
|
5
|
-
constant = [12, 13, 11, 12, 12].
|
6
|
-
frequent = [9, 10, 9, 13, 14].
|
7
|
-
infrequent = [15, 16, 17, 16, 16].
|
8
|
-
never = [17, 18, 12, 18, 20].
|
5
|
+
constant = [12, 13, 11, 12, 12].to_numeric
|
6
|
+
frequent = [9, 10, 9, 13, 14].to_numeric
|
7
|
+
infrequent = [15, 16, 17, 16, 16].to_numeric
|
8
|
+
never = [17, 18, 12, 18, 20].to_numeric
|
9
9
|
@vectors = [constant, frequent, infrequent, never]
|
10
10
|
@c = Statsample::Anova::Contrast.new(vectors: @vectors)
|
11
11
|
end
|
@@ -4,7 +4,7 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
4
4
|
class StatsampleAnovaTwoWayWithVectorsTestCase < Minitest::Test
|
5
5
|
context(Statsample::Anova::TwoWayWithVectors) do
|
6
6
|
setup do
|
7
|
-
@pa = [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3].
|
7
|
+
@pa = [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3].to_numeric
|
8
8
|
@pa.name = 'Passive Avoidance'
|
9
9
|
@a = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1].to_vector
|
10
10
|
@a.labels = { 0 => '0%', 1 => '35%' }
|
@@ -3,9 +3,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
|
|
3
3
|
context(Statsample::Anova::OneWayWithVectors) do
|
4
4
|
context('when initializing') do
|
5
5
|
setup do
|
6
|
-
@v1 = 10.times.map { rand(100) }.
|
7
|
-
@v2 = 10.times.map { rand(100) }.
|
8
|
-
@v3 = 10.times.map { rand(100) }.
|
6
|
+
@v1 = 10.times.map { rand(100) }.to_numeric
|
7
|
+
@v2 = 10.times.map { rand(100) }.to_numeric
|
8
|
+
@v3 = 10.times.map { rand(100) }.to_numeric
|
9
9
|
end
|
10
10
|
should 'be the same using [] or args*' do
|
11
11
|
a1 = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3)
|
@@ -28,9 +28,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
setup do
|
31
|
-
@v1 = [3, 3, 2, 3, 6].to_vector(:
|
32
|
-
@v2 = [7, 6, 5, 6, 7].to_vector(:
|
33
|
-
@v3 = [9, 8, 9, 7, 8].to_vector(:
|
31
|
+
@v1 = [3, 3, 2, 3, 6].to_vector(:numeric)
|
32
|
+
@v2 = [7, 6, 5, 6, 7].to_vector(:numeric)
|
33
|
+
@v3 = [9, 8, 9, 7, 8].to_vector(:numeric)
|
34
34
|
@name = 'Anova testing'
|
35
35
|
@anova = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3, name: @name)
|
36
36
|
end
|
@@ -4,9 +4,9 @@ class StatsampleBartlettSphericityTestCase < Minitest::Test
|
|
4
4
|
include Statsample::Test
|
5
5
|
context Statsample::Test::BartlettSphericity do
|
6
6
|
setup do
|
7
|
-
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].
|
8
|
-
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].
|
9
|
-
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].
|
7
|
+
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric
|
8
|
+
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric
|
9
|
+
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric
|
10
10
|
# KMO: 0.490
|
11
11
|
ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
|
12
12
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
data/test/test_bivariate.rb
CHANGED
@@ -1,38 +1,38 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleBivariateTestCase < Minitest::Test
|
3
3
|
should 'method sum of squares should be correct' do
|
4
|
-
v1 = [1, 2, 3, 4, 5, 6].to_vector(:
|
5
|
-
v2 = [6, 2, 4, 10, 12, 8].to_vector(:
|
4
|
+
v1 = [1, 2, 3, 4, 5, 6].to_vector(:numeric)
|
5
|
+
v2 = [6, 2, 4, 10, 12, 8].to_vector(:numeric)
|
6
6
|
assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1, v2))
|
7
7
|
end
|
8
8
|
should_with_gsl 'return same covariance with ruby and gls implementation' do
|
9
|
-
v1 = 20.times.collect { |_a| rand }.
|
10
|
-
v2 = 20.times.collect { |_a| rand }.
|
9
|
+
v1 = 20.times.collect { |_a| rand }.to_numeric
|
10
|
+
v2 = 20.times.collect { |_a| rand }.to_numeric
|
11
11
|
assert_in_delta(Statsample::Bivariate.covariance(v1, v2), Statsample::Bivariate.covariance_slow(v1, v2), 0.001)
|
12
12
|
end
|
13
13
|
|
14
14
|
should_with_gsl 'return same correlation with ruby and gls implementation' do
|
15
|
-
v1 = 20.times.collect { |_a| rand }.
|
16
|
-
v2 = 20.times.collect { |_a| rand }.
|
15
|
+
v1 = 20.times.collect { |_a| rand }.to_numeric
|
16
|
+
v2 = 20.times.collect { |_a| rand }.to_numeric
|
17
17
|
|
18
18
|
assert_in_delta(GSL::Stats.correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
|
19
19
|
end
|
20
20
|
should 'return correct pearson correlation' do
|
21
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:
|
22
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:
|
21
|
+
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
|
22
|
+
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
|
23
23
|
assert_in_delta(0.525, Statsample::Bivariate.pearson(v1, v2), 0.001)
|
24
24
|
assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v1, v2), 0.001)
|
25
25
|
|
26
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil].to_vector(:
|
27
|
-
v4 = [2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500].to_vector(:
|
26
|
+
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil].to_vector(:numeric)
|
27
|
+
v4 = [2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500].to_vector(:numeric)
|
28
28
|
assert_in_delta(0.525, Statsample::Bivariate.pearson(v3, v4), 0.001)
|
29
29
|
# Test ruby method
|
30
30
|
v3a, v4a = Statsample.only_valid v3, v4
|
31
31
|
assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a, v4a), 0.001)
|
32
32
|
end
|
33
33
|
should 'return correct values for t_pearson and prop_pearson' do
|
34
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:
|
35
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:
|
34
|
+
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
|
35
|
+
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
|
36
36
|
r = Statsample::Bivariate::Pearson.new(v1, v2)
|
37
37
|
assert_in_delta(0.525, r.r, 0.001)
|
38
38
|
assert_in_delta(Statsample::Bivariate.t_pearson(v1, v2), r.t, 0.001)
|
@@ -40,10 +40,10 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
40
40
|
assert(r.summary.size > 0)
|
41
41
|
end
|
42
42
|
should 'return correct correlation_matrix with nils values' do
|
43
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:
|
44
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:
|
45
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:
|
46
|
-
v4 = [2, nil, nil, nil, 3, 7, 8, 6].to_vector(:
|
43
|
+
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
|
44
|
+
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
|
45
|
+
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:numeric)
|
46
|
+
v4 = [2, nil, nil, nil, 3, 7, 8, 6].to_vector(:numeric)
|
47
47
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
|
48
48
|
c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
|
49
49
|
expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
|
@@ -61,11 +61,11 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
61
61
|
end
|
62
62
|
should_with_gsl 'return same values for optimized and pairwise covariance matrix' do
|
63
63
|
cases = 100
|
64
|
-
v1 = Statsample::Vector.
|
65
|
-
v2 = Statsample::Vector.
|
66
|
-
v3 = Statsample::Vector.
|
67
|
-
v4 = Statsample::Vector.
|
68
|
-
v5 = Statsample::Vector.
|
64
|
+
v1 = Statsample::Vector.new_numeric(cases) { rand }
|
65
|
+
v2 = Statsample::Vector.new_numeric(cases) { rand }
|
66
|
+
v3 = Statsample::Vector.new_numeric(cases) { rand }
|
67
|
+
v4 = Statsample::Vector.new_numeric(cases) { rand }
|
68
|
+
v5 = Statsample::Vector.new_numeric(cases) { rand }
|
69
69
|
|
70
70
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
|
71
71
|
|
@@ -76,11 +76,11 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
76
76
|
end
|
77
77
|
should_with_gsl 'return same values for optimized and pairwise correlation matrix' do
|
78
78
|
cases = 100
|
79
|
-
v1 = Statsample::Vector.
|
80
|
-
v2 = Statsample::Vector.
|
81
|
-
v3 = Statsample::Vector.
|
82
|
-
v4 = Statsample::Vector.
|
83
|
-
v5 = Statsample::Vector.
|
79
|
+
v1 = Statsample::Vector.new_numeric(cases) { rand }
|
80
|
+
v2 = Statsample::Vector.new_numeric(cases) { rand }
|
81
|
+
v3 = Statsample::Vector.new_numeric(cases) { rand }
|
82
|
+
v4 = Statsample::Vector.new_numeric(cases) { rand }
|
83
|
+
v5 = Statsample::Vector.new_numeric(cases) { rand }
|
84
84
|
|
85
85
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
|
86
86
|
|
@@ -90,10 +90,10 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
90
90
|
assert_equal_matrix(cor_opt, cor_pw, 1e-15)
|
91
91
|
end
|
92
92
|
should 'return correct correlation_matrix without nils values' do
|
93
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:
|
94
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:
|
95
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:
|
96
|
-
v4 = [2, 4, 6, 7, 3, 7, 8, 6].to_vector(:
|
93
|
+
v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
|
94
|
+
v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
|
95
|
+
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8].to_vector(:numeric)
|
96
|
+
v4 = [2, 4, 6, 7, 3, 7, 8, 6].to_vector(:numeric)
|
97
97
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
|
98
98
|
c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
|
99
99
|
expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
|
@@ -129,25 +129,25 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
129
129
|
end
|
130
130
|
|
131
131
|
should "return correct value for Spearman's rho" do
|
132
|
-
v1 = [86, 97, 99, 100, 101, 103, 106, 110, 112, 113].to_vector(:
|
133
|
-
v2 = [0, 20, 28, 27, 50, 29, 7, 17, 6, 12].to_vector(:
|
132
|
+
v1 = [86, 97, 99, 100, 101, 103, 106, 110, 112, 113].to_vector(:numeric)
|
133
|
+
v2 = [0, 20, 28, 27, 50, 29, 7, 17, 6, 12].to_vector(:numeric)
|
134
134
|
assert_in_delta(-0.175758, Statsample::Bivariate.spearman(v1, v2), 0.0001)
|
135
135
|
end
|
136
136
|
should 'return correct value for point_biserial correlation' do
|
137
|
-
c = [1, 3, 5, 6, 7, 100, 200, 300, 400, 300].to_vector(:
|
138
|
-
d = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0].to_vector(:
|
137
|
+
c = [1, 3, 5, 6, 7, 100, 200, 300, 400, 300].to_vector(:numeric)
|
138
|
+
d = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0].to_vector(:numeric)
|
139
139
|
assert_raises TypeError do
|
140
140
|
Statsample::Bivariate.point_biserial(c, d)
|
141
141
|
end
|
142
142
|
assert_in_delta(Statsample::Bivariate.point_biserial(d, c), Statsample::Bivariate.pearson(d, c), 0.0001)
|
143
143
|
end
|
144
144
|
should 'return correct value for tau_a and tau_b' do
|
145
|
-
v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_vector(:
|
146
|
-
v2 = [1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11].to_vector(:
|
145
|
+
v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_vector(:numeric)
|
146
|
+
v2 = [1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11].to_vector(:numeric)
|
147
147
|
assert_in_delta(0.6727, Statsample::Bivariate.tau_a(v1, v2), 0.001)
|
148
148
|
assert_in_delta(0.6727, Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1, v2).to_matrix)), 0.001)
|
149
|
-
v1 = [12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27].to_vector(:
|
150
|
-
v2 = [11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0].to_vector(:
|
149
|
+
v1 = [12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27].to_vector(:numeric)
|
150
|
+
v2 = [11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0].to_vector(:numeric)
|
151
151
|
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1, v2).to_matrix), 0.001)
|
152
152
|
end
|
153
153
|
should 'return correct value for gamma correlation' do
|
data/test/test_crosstab.rb
CHANGED
@@ -58,8 +58,8 @@ class StatsampleCrosstabTestCase < Minitest::Test
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_crosstab_with_scale
|
61
|
-
v1 = %w(1 1 1 1 1 0 0 0 0 0).
|
62
|
-
v2 = %w(0 0 0 0 0 1 1 1 1 1).
|
61
|
+
v1 = %w(1 1 1 1 1 0 0 0 0 0).to_numeric
|
62
|
+
v2 = %w(0 0 0 0 0 1 1 1 1 1).to_numeric
|
63
63
|
ct = Statsample::Crosstab.new(v1, v2)
|
64
64
|
assert_equal(Matrix[[0, 5], [5, 0]], ct.to_matrix)
|
65
65
|
assert_nothing_raised { ct.summary }
|
data/test/test_csv.rb
CHANGED
@@ -8,11 +8,11 @@ class StatsampleCSVTestCase < Minitest::Test
|
|
8
8
|
def test_read
|
9
9
|
header = %w(id name age city a1)
|
10
10
|
data = {
|
11
|
-
'id' => [1, 2, 3, 4, 5, 6].to_vector(:
|
12
|
-
'name' => %w(Alex Claude Peter Franz George Fernand).to_vector(:
|
13
|
-
'age' => [20, 23, 25, 27, 5.5, nil].to_vector(:
|
14
|
-
'city' => ['New York', 'London', 'London', 'Paris', 'Tome', nil].to_vector(:
|
15
|
-
'a1' => ['a,b', 'b,c', 'a', nil, 'a,b,c', nil].to_vector(:
|
11
|
+
'id' => [1, 2, 3, 4, 5, 6].to_vector(:numeric),
|
12
|
+
'name' => %w(Alex Claude Peter Franz George Fernand).to_vector(:object),
|
13
|
+
'age' => [20, 23, 25, 27, 5.5, nil].to_vector(:numeric),
|
14
|
+
'city' => ['New York', 'London', 'London', 'Paris', 'Tome', nil].to_vector(:object),
|
15
|
+
'a1' => ['a,b', 'b,c', 'a', nil, 'a,b,c', nil].to_vector(:object)
|
16
16
|
}
|
17
17
|
|
18
18
|
ds_exp = Statsample::Dataset.new(data, header)
|
@@ -34,7 +34,7 @@ class StatsampleCSVTestCase < Minitest::Test
|
|
34
34
|
def test_repeated
|
35
35
|
ds = Statsample::CSV.read('test/fixtures/repeated_fields.csv')
|
36
36
|
assert_equal(%w(id name_1 age_1 city a1 name_2 age_2), ds.fields)
|
37
|
-
age = [3, 4, 5, 6, nil, 8].to_vector(:
|
37
|
+
age = [3, 4, 5, 6, nil, 8].to_vector(:numeric)
|
38
38
|
assert_equal(age, ds['age_2'])
|
39
39
|
end
|
40
40
|
|
data/test/test_dataset.rb
CHANGED
@@ -58,9 +58,9 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_merge
|
61
|
-
a = [1, 2, 3].
|
61
|
+
a = [1, 2, 3].to_numeric
|
62
62
|
b = [3, 4, 5].to_vector
|
63
|
-
c = [4, 5, 6].
|
63
|
+
c = [4, 5, 6].to_numeric
|
64
64
|
d = [7, 8, 9].to_vector
|
65
65
|
e = [10, 20, 30].to_vector
|
66
66
|
ds1 = { 'a' => a, 'b' => b }.to_dataset
|
@@ -117,51 +117,51 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
117
117
|
end
|
118
118
|
|
119
119
|
def test_vector_by_calculation
|
120
|
-
a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:
|
121
|
-
a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:
|
122
|
-
a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:
|
120
|
+
a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:numeric)
|
121
|
+
a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:numeric)
|
122
|
+
a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:numeric)
|
123
123
|
ds = { 'a1' => a1, 'a2' => a2, 'a3' => a3 }.to_dataset
|
124
124
|
total = ds.vector_by_calculation {|row|
|
125
125
|
row['a1'] + row['a2'] + row['a3']
|
126
126
|
}
|
127
|
-
expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:
|
127
|
+
expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:numeric)
|
128
128
|
assert_equal(expected, total)
|
129
129
|
end
|
130
130
|
|
131
131
|
def test_vector_sum
|
132
|
-
a1 = [1, 2, 3, 4, 5, nil].to_vector(:
|
133
|
-
a2 = [10, 10, 20, 20, 20, 30].to_vector(:
|
134
|
-
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:
|
135
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:
|
132
|
+
a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
|
133
|
+
a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
|
134
|
+
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
|
135
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
136
136
|
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2 }.to_dataset
|
137
137
|
total = ds.vector_sum
|
138
138
|
a = ds.vector_sum(%w(a1 a2))
|
139
139
|
b = ds.vector_sum(%w(b1 b2))
|
140
|
-
expected_a = [11, 12, 23, 24, 25, nil].to_vector(:
|
141
|
-
expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:
|
142
|
-
expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:
|
140
|
+
expected_a = [11, 12, 23, 24, 25, nil].to_vector(:numeric)
|
141
|
+
expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:numeric)
|
142
|
+
expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:numeric)
|
143
143
|
assert_equal(expected_a, a)
|
144
144
|
assert_equal(expected_b, b)
|
145
145
|
assert_equal(expected_total, total)
|
146
146
|
end
|
147
147
|
|
148
148
|
def test_vector_missing_values
|
149
|
-
a1 = [1, nil, 3, 4, 5, nil].to_vector(:
|
150
|
-
a2 = [10, nil, 20, 20, 20, 30].to_vector(:
|
151
|
-
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:
|
152
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:
|
153
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:
|
149
|
+
a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
|
150
|
+
a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
|
151
|
+
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
|
152
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
153
|
+
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
154
154
|
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
155
|
-
mva = [2, 3, 0, 1, 0, 1].to_vector(:
|
155
|
+
mva = [2, 3, 0, 1, 0, 1].to_vector(:numeric)
|
156
156
|
assert_equal(mva, ds.vector_missing_values)
|
157
157
|
end
|
158
158
|
|
159
159
|
def test_has_missing_values
|
160
|
-
a1 = [1, nil, 3, 4, 5, nil].to_vector(:
|
161
|
-
a2 = [10, nil, 20, 20, 20, 30].to_vector(:
|
162
|
-
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:
|
163
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:
|
164
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:
|
160
|
+
a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
|
161
|
+
a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
|
162
|
+
b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
|
163
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
164
|
+
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
165
165
|
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
166
166
|
assert(ds.has_missing_data?)
|
167
167
|
clean = ds.dup_only_valid
|
@@ -169,31 +169,31 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
169
169
|
end
|
170
170
|
|
171
171
|
def test_vector_count_characters
|
172
|
-
a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:
|
173
|
-
a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:
|
174
|
-
b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:
|
175
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:
|
176
|
-
c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:
|
172
|
+
a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:numeric)
|
173
|
+
a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:numeric)
|
174
|
+
b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:numeric)
|
175
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
176
|
+
c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:numeric)
|
177
177
|
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
178
|
-
exp = [4, 17, 27, 5, 6, 5].to_vector(:
|
178
|
+
exp = [4, 17, 27, 5, 6, 5].to_vector(:numeric)
|
179
179
|
assert_equal(exp, ds.vector_count_characters)
|
180
180
|
end
|
181
181
|
|
182
182
|
def test_vector_mean
|
183
|
-
a1 = [1, 2, 3, 4, 5, nil].to_vector(:
|
184
|
-
a2 = [10, 10, 20, 20, 20, 30].to_vector(:
|
185
|
-
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:
|
186
|
-
b2 = [2, 2, 2, nil, 2, 3].to_vector(:
|
187
|
-
c = [nil, 2, 4, 2, 2, 2].to_vector(:
|
183
|
+
a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
|
184
|
+
a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
|
185
|
+
b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
|
186
|
+
b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
|
187
|
+
c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
|
188
188
|
ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
|
189
189
|
total = ds.vector_mean
|
190
190
|
a = ds.vector_mean(%w(a1 a2), 1)
|
191
191
|
b = ds.vector_mean(%w(b1 b2), 1)
|
192
192
|
c = ds.vector_mean(%w(b1 b2 c), 1)
|
193
|
-
expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:
|
194
|
-
expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:
|
195
|
-
expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:
|
196
|
-
expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:
|
193
|
+
expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:numeric)
|
194
|
+
expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:numeric)
|
195
|
+
expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:numeric)
|
196
|
+
expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:numeric)
|
197
197
|
assert_equal(expected_a, a)
|
198
198
|
assert_equal(expected_b, b)
|
199
199
|
assert_equal(expected_c, c)
|
@@ -210,9 +210,9 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
210
210
|
end
|
211
211
|
|
212
212
|
def test_recode
|
213
|
-
@ds['age'].type = :
|
213
|
+
@ds['age'].type = :numeric
|
214
214
|
@ds.recode!('age') { |c| c['id'] * 2 }
|
215
|
-
expected = [2, 4, 6, 8, 10].to_vector(:
|
215
|
+
expected = [2, 4, 6, 8, 10].to_vector(:numeric)
|
216
216
|
assert_equal(expected, @ds['age'])
|
217
217
|
end
|
218
218
|
|
@@ -231,8 +231,8 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
231
231
|
end
|
232
232
|
|
233
233
|
def test_change_type
|
234
|
-
@ds.col('age').type = :
|
235
|
-
assert_equal(:
|
234
|
+
@ds.col('age').type = :numeric
|
235
|
+
assert_equal(:numeric, @ds.col('age').type)
|
236
236
|
end
|
237
237
|
|
238
238
|
def test_split_by_separator_recode
|
@@ -255,13 +255,13 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
255
255
|
end
|
256
256
|
|
257
257
|
def test_percentiles
|
258
|
-
v1 = (1..100).to_a.
|
258
|
+
v1 = (1..100).to_a.to_numeric
|
259
259
|
assert_equal(50.5, v1.median)
|
260
260
|
assert_equal(25.5, v1.percentil(25))
|
261
|
-
v2 = (1..99).to_a.
|
261
|
+
v2 = (1..99).to_a.to_numeric
|
262
262
|
assert_equal(50, v2.median)
|
263
263
|
assert_equal(25, v2.percentil(25))
|
264
|
-
v3 = (1..50).to_a.
|
264
|
+
v3 = (1..50).to_a.to_numeric
|
265
265
|
assert_equal(25.5, v3.median)
|
266
266
|
assert_equal(13, v3.percentil(25))
|
267
267
|
end
|
@@ -336,7 +336,7 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
336
336
|
|
337
337
|
assert_equal(ds1.fields, ds2.fields)
|
338
338
|
assert_not_same(ds1.fields, ds2.fields)
|
339
|
-
ds1['v1'].type = :
|
339
|
+
ds1['v1'].type = :numeric
|
340
340
|
# dup partial
|
341
341
|
ds3 = ds1.dup('v1')
|
342
342
|
ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
|
@@ -355,7 +355,7 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
355
355
|
assert_not_equal(ds1['v1'], ds3['v1'])
|
356
356
|
assert_equal([], ds3['v1'].data)
|
357
357
|
assert_equal([], ds3['v2'].data)
|
358
|
-
assert_equal(:
|
358
|
+
assert_equal(:numeric, ds3['v1'].type)
|
359
359
|
assert_equal(ds1.fields, ds2.fields)
|
360
360
|
assert_not_same(ds1.fields, ds2.fields)
|
361
361
|
end
|
@@ -368,9 +368,9 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
368
368
|
end
|
369
369
|
|
370
370
|
def test_each_array_with_nils
|
371
|
-
v1 = [1, -99, 3, 4, 'na'].to_vector(:
|
372
|
-
v2 = [5, 6, -99, 8, 20].to_vector(:
|
373
|
-
v3 = [9, 10, 11, 12, 20].to_vector(:
|
371
|
+
v1 = [1, -99, 3, 4, 'na'].to_vector(:numeric, missing_values: [-99, 'na'])
|
372
|
+
v2 = [5, 6, -99, 8, 20].to_vector(:numeric, missing_values: [-99])
|
373
|
+
v3 = [9, 10, 11, 12, 20].to_vector(:numeric, missing_values: [-99])
|
374
374
|
ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
|
375
375
|
ds2 = ds1.dup_empty
|
376
376
|
ds1.each_array_with_nils {|row|
|
@@ -382,40 +382,40 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
382
382
|
end
|
383
383
|
|
384
384
|
def test_dup_only_valid
|
385
|
-
v1 = [1, nil, 3, 4].to_vector(:
|
386
|
-
v2 = [5, 6, nil, 8].to_vector(:
|
387
|
-
v3 = [9, 10, 11, 12].to_vector(:
|
385
|
+
v1 = [1, nil, 3, 4].to_vector(:numeric)
|
386
|
+
v2 = [5, 6, nil, 8].to_vector(:numeric)
|
387
|
+
v3 = [9, 10, 11, 12].to_vector(:numeric)
|
388
388
|
ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
|
389
389
|
ds2 = ds1.dup_only_valid
|
390
|
-
expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:
|
390
|
+
expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:numeric), 'v2' => [5, 8].to_vector(:numeric), 'v3' => [9, 12].to_vector(:numeric))
|
391
391
|
assert_equal(expected, ds2)
|
392
392
|
assert_equal(expected.vectors.values, Statsample.only_valid(v1, v2, v3))
|
393
|
-
expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:
|
393
|
+
expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:numeric), 'v3' => [9, 11, 12].to_vector(:numeric))
|
394
394
|
assert_equal(expected_partial, ds1.dup_only_valid(%w(v1 v3)))
|
395
395
|
end
|
396
396
|
|
397
397
|
def test_filter
|
398
|
-
@ds['age'].type = :
|
398
|
+
@ds['age'].type = :numeric
|
399
399
|
filtered = @ds.filter { |c| c['id'] == 2 or c['id'] == 4 }
|
400
|
-
expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :
|
400
|
+
expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :numeric),
|
401
401
|
'city' => Statsample::Vector.new(%w(London Paris)),
|
402
402
|
'a1' => Statsample::Vector.new(['b,c', nil]) }, %w(id name age city a1))
|
403
403
|
assert_equal(expected, filtered)
|
404
404
|
end
|
405
405
|
|
406
406
|
def test_filter_field
|
407
|
-
@ds['age'].type = :
|
407
|
+
@ds['age'].type = :numeric
|
408
408
|
filtered = @ds.filter_field('id') { |c| c['id'] == 2 or c['id'] == 4 }
|
409
409
|
expected = [2, 4].to_vector
|
410
410
|
assert_equal(expected, filtered)
|
411
411
|
end
|
412
412
|
|
413
413
|
def test_verify
|
414
|
-
name = %w(r1 r2 r3 r4).to_vector(:
|
415
|
-
v1 = [1, 2, 3, 4].to_vector(:
|
416
|
-
v2 = [4, 3, 2, 1].to_vector(:
|
417
|
-
v3 = [10, 20, 30, 40].to_vector(:
|
418
|
-
v4 = %w(a b a b).to_vector(:
|
414
|
+
name = %w(r1 r2 r3 r4).to_vector(:object)
|
415
|
+
v1 = [1, 2, 3, 4].to_vector(:numeric)
|
416
|
+
v2 = [4, 3, 2, 1].to_vector(:numeric)
|
417
|
+
v3 = [10, 20, 30, 40].to_vector(:numeric)
|
418
|
+
v4 = %w(a b a b).to_vector(:object)
|
419
419
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'id' => name }.to_dataset
|
420
420
|
ds.fields = %w(v1 v2 v3 v4 id)
|
421
421
|
# Correct
|
@@ -432,14 +432,14 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
432
432
|
end
|
433
433
|
|
434
434
|
def test_compute_operation
|
435
|
-
v1 = [1, 2, 3, 4].to_vector(:
|
436
|
-
v2 = [4, 3, 2, 1].to_vector(:
|
437
|
-
v3 = [10, 20, 30, 40].to_vector(:
|
438
|
-
|
439
|
-
vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:
|
440
|
-
vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:
|
435
|
+
v1 = [1, 2, 3, 4].to_vector(:numeric)
|
436
|
+
v2 = [4, 3, 2, 1].to_vector(:numeric)
|
437
|
+
v3 = [10, 20, 30, 40].to_vector(:numeric)
|
438
|
+
vnumeric = [1.quo(2), 1, 3.quo(2), 2].to_vector(:numeric)
|
439
|
+
vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:numeric)
|
440
|
+
vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:numeric)
|
441
441
|
ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3 }.to_dataset
|
442
|
-
assert_equal(
|
442
|
+
assert_equal(vnumeric, ds.compute('v1/2'))
|
443
443
|
assert_equal(vsum, ds.compute('v1+v2+v3'))
|
444
444
|
assert_equal(vmult, ds.compute('v1*v2'))
|
445
445
|
end
|
@@ -447,15 +447,15 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
447
447
|
def test_crosstab_with_asignation
|
448
448
|
v1 = %w(a a a b b b c c c).to_vector
|
449
449
|
v2 = %w(a b c a b c a b c).to_vector
|
450
|
-
v3 = %w(0 1 0 0 1 1 0 0 1).
|
450
|
+
v3 = %w(0 1 0 0 1 1 0 0 1).to_numeric
|
451
451
|
ds = Statsample::Dataset.crosstab_by_asignation(v1, v2, v3)
|
452
|
-
assert_equal(:
|
453
|
-
assert_equal(:
|
454
|
-
assert_equal(:
|
452
|
+
assert_equal(:object, ds['_id'].type)
|
453
|
+
assert_equal(:numeric, ds['a'].type)
|
454
|
+
assert_equal(:numeric, ds['b'].type)
|
455
455
|
ev_id = %w(a b c).to_vector
|
456
|
-
ev_a = %w(0 0 0).
|
457
|
-
ev_b = %w(1 1 0).
|
458
|
-
ev_c = %w(0 1 1).
|
456
|
+
ev_a = %w(0 0 0).to_numeric
|
457
|
+
ev_b = %w(1 1 0).to_numeric
|
458
|
+
ev_c = %w(0 1 1).to_numeric
|
459
459
|
ds2 = { '_id' => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }.to_dataset
|
460
460
|
assert_equal(ds, ds2)
|
461
461
|
end
|
@@ -472,7 +472,7 @@ class StatsampleDatasetTestCase < Minitest::Test
|
|
472
472
|
ids = %w(1 1 2 2 2).to_vector
|
473
473
|
colors = %w(red blue green orange white).to_vector
|
474
474
|
values = [10, 20, 15, 30, 20].to_vector
|
475
|
-
col_ids = [1, 2, 1, 2, 3].
|
475
|
+
col_ids = [1, 2, 1, 2, 3].to_numeric
|
476
476
|
ds_expected = { 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values }.to_dataset(%w(id _col_id color value))
|
477
477
|
assert_equal(ds_expected, ds.one_to_many(%w(id), 'car_%v%n'))
|
478
478
|
end
|