statsample 1.5.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
data/lib/statsample/version.rb
CHANGED
data/statsample.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.unshift File.expand_path("../lib/", __FILE__)
|
|
3
3
|
require 'statsample/version'
|
4
4
|
require 'date'
|
5
5
|
|
6
|
-
DESCRIPTION = <<MSG
|
6
|
+
Statsample::DESCRIPTION = <<MSG
|
7
7
|
A suite for basic and advanced statistics on Ruby. Tested on CRuby 1.9.3, 2.0.0
|
8
8
|
and 2.1.1. See `.travis.yml` for more information.
|
9
9
|
|
@@ -11,7 +11,6 @@ Include:
|
|
11
11
|
|
12
12
|
- Descriptive statistics: frequencies, median, mean,
|
13
13
|
standard error, skew, kurtosis (and many others).
|
14
|
-
- Imports and exports datasets from and to Excel, CSV and plain text files.
|
15
14
|
- Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial,
|
16
15
|
tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by
|
17
16
|
statsample-bivariate-extension gem.
|
@@ -32,17 +31,11 @@ scales using factor analysis and correlations, if you want it.
|
|
32
31
|
- Graphics: Histogram, Boxplot and Scatterplot.
|
33
32
|
MSG
|
34
33
|
|
35
|
-
POSTINSTALL = <<MSG
|
34
|
+
Statsample::POSTINSTALL = <<MSG
|
36
35
|
***************************************************
|
37
36
|
|
38
37
|
Thanks for installing statsample.
|
39
38
|
|
40
|
-
On *nix, you could install statsample-optimization
|
41
|
-
to retrieve gems gsl, statistics2 and a C extension
|
42
|
-
to speed some methods.
|
43
|
-
|
44
|
-
$ [sudo] gem install statsample-optimization
|
45
|
-
|
46
39
|
*****************************************************
|
47
40
|
MSG
|
48
41
|
|
@@ -56,8 +49,8 @@ Gem::Specification.new do |s|
|
|
56
49
|
s.email = ["clbustos@gmail.com", "carlos@onox.com.br"]
|
57
50
|
|
58
51
|
s.summary = "A suite for basic and advanced statistics on Ruby"
|
59
|
-
s.description = DESCRIPTION
|
60
|
-
s.post_install_message = POSTINSTALL
|
52
|
+
s.description = Statsample::DESCRIPTION
|
53
|
+
s.post_install_message = Statsample::POSTINSTALL
|
61
54
|
|
62
55
|
s.rdoc_options = ["--main", "README.md"]
|
63
56
|
s.extra_rdoc_files = ["History.txt", "LICENSE.txt", "README.md", "references.txt"]
|
@@ -67,22 +60,25 @@ Gem::Specification.new do |s|
|
|
67
60
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
68
61
|
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
69
62
|
|
70
|
-
s.add_runtime_dependency '
|
63
|
+
s.add_runtime_dependency 'daru', '~> 0.1'
|
64
|
+
s.add_runtime_dependency 'spreadsheet', '~> 1.0.3'
|
71
65
|
s.add_runtime_dependency 'reportbuilder', '~> 1.4'
|
72
66
|
s.add_runtime_dependency 'minimization', '~> 0.2'
|
73
67
|
s.add_runtime_dependency 'dirty-memoize', '~> 0.0.4'
|
74
|
-
s.add_runtime_dependency 'extendmatrix', '~> 0.
|
68
|
+
s.add_runtime_dependency 'extendmatrix', '~> 0.4'
|
75
69
|
s.add_runtime_dependency 'rserve-client', '~> 0.3'
|
76
|
-
s.add_runtime_dependency 'rubyvis', '~> 0.
|
70
|
+
s.add_runtime_dependency 'rubyvis', '~> 0.6.1'
|
77
71
|
s.add_runtime_dependency 'distribution', '~> 0.7'
|
78
72
|
s.add_runtime_dependency 'awesome_print', '~> 1.6'
|
79
73
|
|
80
|
-
s.add_development_dependency 'bundler', '~> 1.
|
74
|
+
s.add_development_dependency 'bundler', '~> 1.10'
|
81
75
|
s.add_development_dependency 'rake', '~> 10.4'
|
82
76
|
s.add_development_dependency 'rdoc', '~> 4.2'
|
83
77
|
s.add_development_dependency 'shoulda', '~> 3.5'
|
84
78
|
s.add_development_dependency 'shoulda-matchers', '~> 2.2'
|
85
|
-
s.add_development_dependency 'minitest', '~> 5.
|
79
|
+
s.add_development_dependency 'minitest', '~> 5.7'
|
86
80
|
s.add_development_dependency 'gettext', '~> 3.1'
|
87
81
|
s.add_development_dependency 'mocha', '~> 1.1'
|
82
|
+
s.add_development_dependency 'nmatrix', '~> 0.1.0'
|
83
|
+
s.add_development_dependency 'gsl-nmatrix', '~> 1.17.0'
|
88
84
|
end
|
data/test/helpers_tests.rb
CHANGED
@@ -33,7 +33,7 @@ module Minitest
|
|
33
33
|
def assert_similar_vector(exp, obs, delta = 1e-10, msg = nil)
|
34
34
|
msg ||= "Different vectors #{exp} - #{obs}"
|
35
35
|
assert_equal(exp.size, obs.size)
|
36
|
-
exp.
|
36
|
+
exp.to_a.each_with_index {|v, i|
|
37
37
|
assert_in_delta(v, obs[i], delta)
|
38
38
|
}
|
39
39
|
end
|
data/test/test_analysis.rb
CHANGED
@@ -39,7 +39,7 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
39
39
|
should 'to_text returns the same as a normal ReportBuilder object' do
|
40
40
|
rb = ReportBuilder.new(name: :test)
|
41
41
|
section = ReportBuilder::Section.new(name: 'first')
|
42
|
-
a = [1, 2, 3]
|
42
|
+
a = Daru::Vector.new([1, 2, 3])
|
43
43
|
section.add('first')
|
44
44
|
section.add(a)
|
45
45
|
rb.add(section)
|
@@ -98,8 +98,8 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
98
98
|
end
|
99
99
|
should 'attach() allows to call objects on objects which respond to fields' do
|
100
100
|
an = Statsample::Analysis::Suite.new(:summary)
|
101
|
-
ds = {
|
102
|
-
ds.expects(:
|
101
|
+
ds = { :x => stub(mean: 10), :y => stub(mean: 12) }
|
102
|
+
ds.expects(:vectors).returns([:x, :y]).at_least_once
|
103
103
|
an.attach(ds)
|
104
104
|
assert_equal(10, an.x.mean)
|
105
105
|
assert_equal(12, an.y.mean)
|
@@ -109,10 +109,10 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
109
109
|
end
|
110
110
|
should 'attached objects should be called LIFO' do
|
111
111
|
an = Statsample::Analysis::Suite.new(:summary)
|
112
|
-
ds1 = {
|
113
|
-
ds1.expects(:
|
114
|
-
ds2 = {
|
115
|
-
ds2.expects(:
|
112
|
+
ds1 = { :x => stub(mean: 100), :y => stub(mean: 120), :z => stub(mean: 13) }
|
113
|
+
ds1.expects(:vectors).returns([:x, :y, :z]).at_least_once
|
114
|
+
ds2 = { :x => stub(mean: 10), :y => stub(mean: 12) }
|
115
|
+
ds2.expects(:vectors).returns([:x, :y]).at_least_once
|
116
116
|
an.attach(ds1)
|
117
117
|
an.attach(ds2)
|
118
118
|
assert_equal(10, an.x.mean)
|
@@ -122,10 +122,10 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
122
122
|
|
123
123
|
should 'detach() without arguments drop latest object' do
|
124
124
|
an = Statsample::Analysis::Suite.new(:summary)
|
125
|
-
ds1 = {
|
126
|
-
ds1.expects(:
|
127
|
-
ds2 = {
|
128
|
-
ds2.expects(:
|
125
|
+
ds1 = { :x => stub(mean: 100), :y => stub(mean: 120), :z => stub(mean: 13) }
|
126
|
+
ds1.expects(:vectors).returns([:x, :y, :z]).at_least_once
|
127
|
+
ds2 = { :x => stub(mean: 10), :y => stub(mean: 12) }
|
128
|
+
ds2.expects(:vectors).returns([:x, :y]).at_least_once
|
129
129
|
an.attach(ds1)
|
130
130
|
an.attach(ds2)
|
131
131
|
assert_equal(10, an.x.mean)
|
@@ -134,12 +134,12 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
134
134
|
end
|
135
135
|
should 'detach() with argument drop select object' do
|
136
136
|
an = Statsample::Analysis::Suite.new(:summary)
|
137
|
-
ds1 = {
|
138
|
-
ds1.expects(:
|
139
|
-
ds2 = {
|
140
|
-
ds2.expects(:
|
141
|
-
ds3 = {
|
142
|
-
ds3.expects(:
|
137
|
+
ds1 = { :x => 1 }
|
138
|
+
ds1.expects(:vectors).returns([:x]).at_least_once
|
139
|
+
ds2 = { :x => 2, :y => 3 }
|
140
|
+
ds2.expects(:vectors).returns([:x, :y]).at_least_once
|
141
|
+
ds3 = { :y => 4 }
|
142
|
+
ds3.expects(:vectors).returns([:y]).at_least_once
|
143
143
|
|
144
144
|
an.attach(ds3)
|
145
145
|
an.attach(ds2)
|
data/test/test_anova_contrast.rb
CHANGED
@@ -2,12 +2,12 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
2
2
|
class StatsampleAnovaContrastTestCase < Minitest::Test
|
3
3
|
context(Statsample::Anova::Contrast) do
|
4
4
|
setup do
|
5
|
-
constant
|
6
|
-
frequent
|
7
|
-
infrequent = [15, 16, 17, 16, 16]
|
8
|
-
never
|
9
|
-
@vectors
|
10
|
-
@c
|
5
|
+
constant = Daru::Vector.new([12, 13, 11, 12, 12])
|
6
|
+
frequent = Daru::Vector.new([9, 10, 9, 13, 14])
|
7
|
+
infrequent = Daru::Vector.new([15, 16, 17, 16, 16])
|
8
|
+
never = Daru::Vector.new([17, 18, 12, 18, 20])
|
9
|
+
@vectors = [constant, frequent, infrequent, never]
|
10
|
+
@c = Statsample::Anova::Contrast.new(vectors: @vectors)
|
11
11
|
end
|
12
12
|
should 'return correct value using c' do
|
13
13
|
@c.c([1, -1.quo(3), -1.quo(3), -1.quo(3)])
|
@@ -4,14 +4,14 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
4
4
|
class StatsampleAnovaTwoWayWithVectorsTestCase < Minitest::Test
|
5
5
|
context(Statsample::Anova::TwoWayWithVectors) do
|
6
6
|
setup do
|
7
|
-
@pa = [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3]
|
8
|
-
@pa.
|
9
|
-
@a = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
|
10
|
-
@a.labels = { 0 => '0%', 1 => '35%' }
|
11
|
-
@a.
|
12
|
-
@b = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
13
|
-
@b.labels = { 0 => 'Young', 1 => 'Older' }
|
14
|
-
@b.
|
7
|
+
@pa = Daru::Vector.new [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3]
|
8
|
+
@pa.rename 'Passive Avoidance'
|
9
|
+
@a = Daru::Vector.new [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
|
10
|
+
# @a.labels = { 0 => '0%', 1 => '35%' }
|
11
|
+
@a.rename 'Diet'
|
12
|
+
@b = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
13
|
+
# @b.labels = { 0 => 'Young', 1 => 'Older' }
|
14
|
+
@b.rename 'Age'
|
15
15
|
@anova = Statsample::Anova::TwoWayWithVectors.new(a: @a, b: @b, dependent: @pa)
|
16
16
|
end
|
17
17
|
should 'Statsample::Anova respond to #twoway_with_vectors' do
|
@@ -3,9 +3,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
|
|
3
3
|
context(Statsample::Anova::OneWayWithVectors) do
|
4
4
|
context('when initializing') do
|
5
5
|
setup do
|
6
|
-
@v1 = 10.times.map { rand(100) }
|
7
|
-
@v2 = 10.times.map { rand(100) }
|
8
|
-
@v3 = 10.times.map { rand(100) }
|
6
|
+
@v1 = Daru::Vector.new(10.times.map { rand(100) })
|
7
|
+
@v2 = Daru::Vector.new(10.times.map { rand(100) })
|
8
|
+
@v3 = Daru::Vector.new(10.times.map { rand(100) })
|
9
9
|
end
|
10
10
|
should 'be the same using [] or args*' do
|
11
11
|
a1 = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3)
|
@@ -28,9 +28,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
setup do
|
31
|
-
@v1 = [3, 3, 2, 3, 6]
|
32
|
-
@v2 = [7, 6, 5, 6, 7]
|
33
|
-
@v3 = [9, 8, 9, 7, 8]
|
31
|
+
@v1 = Daru::Vector.new([3, 3, 2, 3, 6])
|
32
|
+
@v2 = Daru::Vector.new([7, 6, 5, 6, 7])
|
33
|
+
@v3 = Daru::Vector.new([9, 8, 9, 7, 8])
|
34
34
|
@name = 'Anova testing'
|
35
35
|
@anova = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3, name: @name)
|
36
36
|
end
|
@@ -66,10 +66,10 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
|
|
66
66
|
assert_in_delta(@anova.sst, @anova.sswg + @anova.ssbg, 0.00001)
|
67
67
|
end
|
68
68
|
should 'df total equal to number of n-1' do
|
69
|
-
assert_equal(@v1.
|
69
|
+
assert_equal(@v1.size + @v2.size + @v3.size - 1, @anova.df_total)
|
70
70
|
end
|
71
71
|
should 'df wg equal to number of n-k' do
|
72
|
-
assert_equal(@v1.
|
72
|
+
assert_equal(@v1.size + @v2.size + @v3.size - 3, @anova.df_wg)
|
73
73
|
end
|
74
74
|
should 'df bg equal to number of k-1' do
|
75
75
|
assert_equal(2, @anova.df_bg)
|
@@ -4,11 +4,11 @@ class StatsampleBartlettSphericityTestCase < Minitest::Test
|
|
4
4
|
include Statsample::Test
|
5
5
|
context Statsample::Test::BartlettSphericity do
|
6
6
|
setup do
|
7
|
-
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70]
|
8
|
-
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0]
|
9
|
-
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4]
|
7
|
+
@v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
|
8
|
+
@v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
|
9
|
+
@v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
|
10
10
|
# KMO: 0.490
|
11
|
-
ds = {
|
11
|
+
ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
|
12
12
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
13
13
|
@bs = Statsample::Test::BartlettSphericity.new(cor, 14)
|
14
14
|
end
|
data/test/test_bivariate.rb
CHANGED
@@ -1,38 +1,38 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleBivariateTestCase < Minitest::Test
|
3
3
|
should 'method sum of squares should be correct' do
|
4
|
-
v1 = [1, 2, 3, 4, 5, 6]
|
5
|
-
v2 = [6, 2, 4, 10, 12, 8]
|
4
|
+
v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6])
|
5
|
+
v2 = Daru::Vector.new([6, 2, 4, 10, 12, 8])
|
6
6
|
assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1, v2))
|
7
7
|
end
|
8
8
|
should_with_gsl 'return same covariance with ruby and gls implementation' do
|
9
|
-
v1 = 20.times.collect { |_a| rand }
|
10
|
-
v2 = 20.times.collect { |_a| rand }
|
9
|
+
v1 = Daru::Vector.new(20.times.collect { |_a| rand })
|
10
|
+
v2 = Daru::Vector.new(20.times.collect { |_a| rand })
|
11
11
|
assert_in_delta(Statsample::Bivariate.covariance(v1, v2), Statsample::Bivariate.covariance_slow(v1, v2), 0.001)
|
12
12
|
end
|
13
13
|
|
14
14
|
should_with_gsl 'return same correlation with ruby and gls implementation' do
|
15
|
-
v1 = 20.times.collect { |_a| rand }
|
16
|
-
v2 = 20.times.collect { |_a| rand }
|
15
|
+
v1 = Daru::Vector.new(20.times.collect { |_a| rand })
|
16
|
+
v2 = Daru::Vector.new(20.times.collect { |_a| rand })
|
17
17
|
|
18
|
-
assert_in_delta(GSL::Stats.correlation(v1.
|
18
|
+
assert_in_delta(GSL::Stats.correlation(v1.to_gsl, v2.to_gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
|
19
19
|
end
|
20
20
|
should 'return correct pearson correlation' do
|
21
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2]
|
22
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2]
|
21
|
+
v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
|
22
|
+
v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
|
23
23
|
assert_in_delta(0.525, Statsample::Bivariate.pearson(v1, v2), 0.001)
|
24
24
|
assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v1, v2), 0.001)
|
25
25
|
|
26
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil]
|
27
|
-
v4 = [2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500]
|
26
|
+
v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil])
|
27
|
+
v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500])
|
28
28
|
assert_in_delta(0.525, Statsample::Bivariate.pearson(v3, v4), 0.001)
|
29
29
|
# Test ruby method
|
30
30
|
v3a, v4a = Statsample.only_valid v3, v4
|
31
31
|
assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a, v4a), 0.001)
|
32
32
|
end
|
33
33
|
should 'return correct values for t_pearson and prop_pearson' do
|
34
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2]
|
35
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2]
|
34
|
+
v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
|
35
|
+
v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
|
36
36
|
r = Statsample::Bivariate::Pearson.new(v1, v2)
|
37
37
|
assert_in_delta(0.525, r.r, 0.001)
|
38
38
|
assert_in_delta(Statsample::Bivariate.t_pearson(v1, v2), r.t, 0.001)
|
@@ -40,11 +40,11 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
40
40
|
assert(r.summary.size > 0)
|
41
41
|
end
|
42
42
|
should 'return correct correlation_matrix with nils values' do
|
43
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2]
|
44
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2]
|
45
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8]
|
46
|
-
v4 = [2, nil, nil, nil, 3, 7, 8, 6]
|
47
|
-
ds = {
|
43
|
+
v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
|
44
|
+
v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
|
45
|
+
v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
|
46
|
+
v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6])
|
47
|
+
ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
|
48
48
|
c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
|
49
49
|
expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
|
50
50
|
[c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
|
@@ -61,13 +61,13 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
61
61
|
end
|
62
62
|
should_with_gsl 'return same values for optimized and pairwise covariance matrix' do
|
63
63
|
cases = 100
|
64
|
-
v1 =
|
65
|
-
v2 =
|
66
|
-
v3 =
|
67
|
-
v4 =
|
68
|
-
v5 =
|
64
|
+
v1 = Daru::Vector.new_with_size(cases) { rand }
|
65
|
+
v2 = Daru::Vector.new_with_size(cases) { rand }
|
66
|
+
v3 = Daru::Vector.new_with_size(cases) { rand }
|
67
|
+
v4 = Daru::Vector.new_with_size(cases) { rand }
|
68
|
+
v5 = Daru::Vector.new_with_size(cases) { rand }
|
69
69
|
|
70
|
-
ds = {
|
70
|
+
ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
|
71
71
|
|
72
72
|
cor_opt = Statsample::Bivariate.covariance_matrix_optimized(ds)
|
73
73
|
|
@@ -76,13 +76,14 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
76
76
|
end
|
77
77
|
should_with_gsl 'return same values for optimized and pairwise correlation matrix' do
|
78
78
|
cases = 100
|
79
|
-
v1 =
|
80
|
-
v2 =
|
81
|
-
v3 =
|
82
|
-
v4 =
|
83
|
-
v5 =
|
79
|
+
v1 = Daru::Vector.new_with_size(cases) { rand }
|
80
|
+
v2 = Daru::Vector.new_with_size(cases) { rand }
|
81
|
+
v3 = Daru::Vector.new_with_size(cases) { rand }
|
82
|
+
v4 = Daru::Vector.new_with_size(cases) { rand }
|
83
|
+
v5 = Daru::Vector.new_with_size(cases) { rand }
|
84
84
|
|
85
|
-
ds = {
|
85
|
+
ds = Daru::DataFrame.new({
|
86
|
+
:v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
|
86
87
|
|
87
88
|
cor_opt = Statsample::Bivariate.correlation_matrix_optimized(ds)
|
88
89
|
|
@@ -90,11 +91,11 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
90
91
|
assert_equal_matrix(cor_opt, cor_pw, 1e-15)
|
91
92
|
end
|
92
93
|
should 'return correct correlation_matrix without nils values' do
|
93
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2]
|
94
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2]
|
95
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8]
|
96
|
-
v4 = [2, 4, 6, 7, 3, 7, 8, 6]
|
97
|
-
ds = {
|
94
|
+
v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
|
95
|
+
v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
|
96
|
+
v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
|
97
|
+
v4 = Daru::Vector.new([2, 4, 6, 7, 3, 7, 8, 6])
|
98
|
+
ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
|
98
99
|
c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
|
99
100
|
expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
|
100
101
|
[c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
|
@@ -129,25 +130,25 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
129
130
|
end
|
130
131
|
|
131
132
|
should "return correct value for Spearman's rho" do
|
132
|
-
v1 = [86, 97, 99, 100, 101, 103, 106, 110, 112, 113]
|
133
|
-
v2 = [0, 20, 28, 27, 50, 29, 7, 17, 6, 12]
|
133
|
+
v1 =Daru::Vector.new( [86, 97, 99, 100, 101, 103, 106, 110, 112, 113])
|
134
|
+
v2 =Daru::Vector.new( [0, 20, 28, 27, 50, 29, 7, 17, 6, 12])
|
134
135
|
assert_in_delta(-0.175758, Statsample::Bivariate.spearman(v1, v2), 0.0001)
|
135
136
|
end
|
136
137
|
should 'return correct value for point_biserial correlation' do
|
137
|
-
c = [1, 3, 5, 6, 7, 100, 200, 300, 400, 300]
|
138
|
-
d = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
|
138
|
+
c = Daru::Vector.new([1, 3, 5, 6, 7, 100, 200, 300, 400, 300])
|
139
|
+
d = Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
|
139
140
|
assert_raises TypeError do
|
140
141
|
Statsample::Bivariate.point_biserial(c, d)
|
141
142
|
end
|
142
143
|
assert_in_delta(Statsample::Bivariate.point_biserial(d, c), Statsample::Bivariate.pearson(d, c), 0.0001)
|
143
144
|
end
|
144
145
|
should 'return correct value for tau_a and tau_b' do
|
145
|
-
v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
146
|
-
v2 = [1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11]
|
146
|
+
v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
|
147
|
+
v2 = Daru::Vector.new([1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11])
|
147
148
|
assert_in_delta(0.6727, Statsample::Bivariate.tau_a(v1, v2), 0.001)
|
148
149
|
assert_in_delta(0.6727, Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1, v2).to_matrix)), 0.001)
|
149
|
-
v1 = [12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27]
|
150
|
-
v2 = [11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0]
|
150
|
+
v1 = Daru::Vector.new([12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27])
|
151
|
+
v2 = Daru::Vector.new([11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0])
|
151
152
|
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1, v2).to_matrix), 0.001)
|
152
153
|
end
|
153
154
|
should 'return correct value for gamma correlation' do
|
@@ -156,4 +157,8 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
156
157
|
m2 = Matrix[[15, 12, 6, 5], [12, 8, 10, 8], [4, 6, 9, 10]]
|
157
158
|
assert_in_delta(0.349, Statsample::Bivariate.gamma(m2), 0.001)
|
158
159
|
end
|
160
|
+
|
161
|
+
should 'return correct residuals' do
|
162
|
+
# TODO: test Statsample::Bivariate.residuals
|
163
|
+
end
|
159
164
|
end
|
data/test/test_codification.rb
CHANGED
@@ -1,33 +1,33 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleCodificationTestCase < Minitest::Test
|
3
3
|
def initialize(*args)
|
4
|
-
v1 = %w(run walk,run walking running sleep sleeping,dreaming sleep,dream)
|
4
|
+
v1 = Daru::Vector.new(%w(run walk,run walking running sleep sleeping,dreaming sleep,dream))
|
5
5
|
@dict = { 'run' => 'r', 'walk' => 'w', 'walking' => 'w', 'running' => 'r', 'sleep' => 's', 'sleeping' => 's', 'dream' => 'd', 'dreaming' => 'd' }
|
6
|
-
@ds = {
|
6
|
+
@ds = Daru::DataFrame.new({ :v1 => v1 })
|
7
7
|
super
|
8
8
|
end
|
9
9
|
|
10
10
|
def test_create_hash
|
11
11
|
expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
|
12
|
-
hash = Statsample::Codification.create_hash(@ds, [
|
13
|
-
assert_equal([
|
14
|
-
assert_equal(expected_keys_v1, hash[
|
15
|
-
assert_equal(expected_keys_v1, hash[
|
12
|
+
hash = Statsample::Codification.create_hash(@ds, [:v1])
|
13
|
+
assert_equal([:v1], hash.keys)
|
14
|
+
assert_equal(expected_keys_v1, hash[:v1].keys.sort)
|
15
|
+
assert_equal(expected_keys_v1, hash[:v1].values.sort)
|
16
16
|
end
|
17
17
|
|
18
18
|
def test_create_excel
|
19
19
|
filename = Dir.tmpdir + '/test_excel' + Time.now.to_s + '.xls'
|
20
20
|
# filename = Tempfile.new("test_codification_"+Time.now().to_s)
|
21
21
|
Statsample::Codification.create_excel(@ds, ['v1'], filename)
|
22
|
-
field = (['v1'] * 8)
|
23
|
-
keys = %w(dream dreaming run running sleep sleeping walk walking)
|
24
|
-
ds =
|
25
|
-
assert_equal(field, ds[
|
26
|
-
assert_equal(keys, ds[
|
27
|
-
assert_equal(keys, ds[
|
22
|
+
field = Daru::Vector.new(['v1'] * 8, name: :field)
|
23
|
+
keys = Daru::Vector.new(%w(dream dreaming run running sleep sleeping walk walking))
|
24
|
+
ds = Daru::DataFrame.from_excel(filename)
|
25
|
+
assert_equal(field, ds[:field])
|
26
|
+
assert_equal(keys, ds[:original])
|
27
|
+
assert_equal(keys, ds[:recoded])
|
28
28
|
hash = Statsample::Codification.excel_to_recoded_hash(filename)
|
29
|
-
assert_equal(keys.
|
30
|
-
assert_equal(keys.
|
29
|
+
assert_equal(keys.to_a, hash[:v1].keys.sort)
|
30
|
+
assert_equal(keys.to_a, hash[:v1].values.sort)
|
31
31
|
end
|
32
32
|
|
33
33
|
def test_create_yaml
|
@@ -35,44 +35,44 @@ class StatsampleCodificationTestCase < Minitest::Test
|
|
35
35
|
Statsample::Codification.create_yaml(@ds, [])
|
36
36
|
end
|
37
37
|
expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
|
38
|
-
yaml_hash = Statsample::Codification.create_yaml(@ds, [
|
38
|
+
yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1])
|
39
39
|
h = YAML.load(yaml_hash)
|
40
|
-
assert_equal([
|
41
|
-
assert_equal(expected_keys_v1, h[
|
40
|
+
assert_equal([:v1], h.keys)
|
41
|
+
assert_equal(expected_keys_v1, h[:v1].keys.sort)
|
42
42
|
tf = Tempfile.new('test_codification')
|
43
|
-
yaml_hash = Statsample::Codification.create_yaml(@ds, [
|
43
|
+
yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1], tf, Statsample::SPLIT_TOKEN)
|
44
44
|
tf.close
|
45
45
|
tf.open
|
46
46
|
h = YAML.load(tf)
|
47
|
-
assert_equal([
|
48
|
-
assert_equal(expected_keys_v1, h[
|
47
|
+
assert_equal([:v1], h.keys)
|
48
|
+
assert_equal(expected_keys_v1, h[:v1].keys.sort)
|
49
49
|
tf.close(true)
|
50
50
|
end
|
51
51
|
|
52
52
|
def test_recodification
|
53
53
|
expected = [['r'], %w(w r), ['w'], ['r'], ['s'], %w(s d), %w(s d)]
|
54
|
-
assert_equal(expected, Statsample::Codification.recode_vector(@ds[
|
55
|
-
v2 = ['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking']
|
54
|
+
assert_equal(expected, Statsample::Codification.recode_vector(@ds[:v1], @dict))
|
55
|
+
v2 = Daru::Vector.new(['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'])
|
56
56
|
expected = [['r'], %w(w d), nil, %w(w d)]
|
57
57
|
assert_equal(expected, Statsample::Codification.recode_vector(v2, @dict))
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_recode_dataset_simple
|
61
|
-
Statsample::Codification.recode_dataset_simple!(@ds,
|
62
|
-
expected_vector = ['r', 'w,r', 'w', 'r', 's', 's,d', 's,d']
|
63
|
-
assert_not_equal(expected_vector, @ds[
|
64
|
-
assert_equal(expected_vector, @ds[
|
61
|
+
Statsample::Codification.recode_dataset_simple!(@ds, :v1 => @dict)
|
62
|
+
expected_vector = Daru::Vector.new(['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'])
|
63
|
+
assert_not_equal(expected_vector, @ds[:v1])
|
64
|
+
assert_equal(expected_vector, @ds[:v1_recoded])
|
65
65
|
end
|
66
66
|
|
67
67
|
def test_recode_dataset_split
|
68
|
-
Statsample::Codification.recode_dataset_split!(@ds,
|
68
|
+
Statsample::Codification.recode_dataset_split!(@ds, :v1 => @dict)
|
69
69
|
e = {}
|
70
|
-
e['r'] = [1, 1, 0, 1, 0, 0, 0]
|
71
|
-
e['w'] = [0, 1, 1, 0, 0, 0, 0]
|
72
|
-
e['s'] = [0, 0, 0, 0, 1, 1, 1]
|
73
|
-
e['d'] = [0, 0, 0, 0, 0, 1, 1]
|
74
|
-
e.each{|k, expected|
|
75
|
-
assert_equal(expected, @ds['v1_' + k], "Error on key #{k}")
|
70
|
+
e['r'] = Daru::Vector.new([1, 1, 0, 1, 0, 0, 0])
|
71
|
+
e['w'] = Daru::Vector.new([0, 1, 1, 0, 0, 0, 0])
|
72
|
+
e['s'] = Daru::Vector.new([0, 0, 0, 0, 1, 1, 1])
|
73
|
+
e['d'] = Daru::Vector.new([0, 0, 0, 0, 0, 1, 1])
|
74
|
+
e.each { |k, expected|
|
75
|
+
assert_equal(expected, @ds[('v1_' + k).to_sym], "Error on key #{k}")
|
76
76
|
}
|
77
77
|
end
|
78
78
|
end
|