statsample 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
data/lib/statsample/version.rb
CHANGED
data/statsample.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.unshift File.expand_path("../lib/", __FILE__)
|
|
3
3
|
require 'statsample/version'
|
4
4
|
require 'date'
|
5
5
|
|
6
|
-
DESCRIPTION = <<MSG
|
6
|
+
Statsample::DESCRIPTION = <<MSG
|
7
7
|
A suite for basic and advanced statistics on Ruby. Tested on CRuby 1.9.3, 2.0.0
|
8
8
|
and 2.1.1. See `.travis.yml` for more information.
|
9
9
|
|
@@ -11,7 +11,6 @@ Include:
|
|
11
11
|
|
12
12
|
- Descriptive statistics: frequencies, median, mean,
|
13
13
|
standard error, skew, kurtosis (and many others).
|
14
|
-
- Imports and exports datasets from and to Excel, CSV and plain text files.
|
15
14
|
- Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial,
|
16
15
|
tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by
|
17
16
|
statsample-bivariate-extension gem.
|
@@ -32,17 +31,11 @@ scales using factor analysis and correlations, if you want it.
|
|
32
31
|
- Graphics: Histogram, Boxplot and Scatterplot.
|
33
32
|
MSG
|
34
33
|
|
35
|
-
POSTINSTALL = <<MSG
|
34
|
+
Statsample::POSTINSTALL = <<MSG
|
36
35
|
***************************************************
|
37
36
|
|
38
37
|
Thanks for installing statsample.
|
39
38
|
|
40
|
-
On *nix, you could install statsample-optimization
|
41
|
-
to retrieve gems gsl, statistics2 and a C extension
|
42
|
-
to speed some methods.
|
43
|
-
|
44
|
-
$ [sudo] gem install statsample-optimization
|
45
|
-
|
46
39
|
*****************************************************
|
47
40
|
MSG
|
48
41
|
|
@@ -56,8 +49,8 @@ Gem::Specification.new do |s|
|
|
56
49
|
s.email = ["clbustos@gmail.com", "carlos@onox.com.br"]
|
57
50
|
|
58
51
|
s.summary = "A suite for basic and advanced statistics on Ruby"
|
59
|
-
s.description = DESCRIPTION
|
60
|
-
s.post_install_message = POSTINSTALL
|
52
|
+
s.description = Statsample::DESCRIPTION
|
53
|
+
s.post_install_message = Statsample::POSTINSTALL
|
61
54
|
|
62
55
|
s.rdoc_options = ["--main", "README.md"]
|
63
56
|
s.extra_rdoc_files = ["History.txt", "LICENSE.txt", "README.md", "references.txt"]
|
@@ -67,22 +60,25 @@ Gem::Specification.new do |s|
|
|
67
60
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
68
61
|
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
69
62
|
|
70
|
-
s.add_runtime_dependency '
|
63
|
+
s.add_runtime_dependency 'daru', '~> 0.1'
|
64
|
+
s.add_runtime_dependency 'spreadsheet', '~> 1.0.3'
|
71
65
|
s.add_runtime_dependency 'reportbuilder', '~> 1.4'
|
72
66
|
s.add_runtime_dependency 'minimization', '~> 0.2'
|
73
67
|
s.add_runtime_dependency 'dirty-memoize', '~> 0.0.4'
|
74
|
-
s.add_runtime_dependency 'extendmatrix', '~> 0.
|
68
|
+
s.add_runtime_dependency 'extendmatrix', '~> 0.4'
|
75
69
|
s.add_runtime_dependency 'rserve-client', '~> 0.3'
|
76
|
-
s.add_runtime_dependency 'rubyvis', '~> 0.
|
70
|
+
s.add_runtime_dependency 'rubyvis', '~> 0.6.1'
|
77
71
|
s.add_runtime_dependency 'distribution', '~> 0.7'
|
78
72
|
s.add_runtime_dependency 'awesome_print', '~> 1.6'
|
79
73
|
|
80
|
-
s.add_development_dependency 'bundler', '~> 1.
|
74
|
+
s.add_development_dependency 'bundler', '~> 1.10'
|
81
75
|
s.add_development_dependency 'rake', '~> 10.4'
|
82
76
|
s.add_development_dependency 'rdoc', '~> 4.2'
|
83
77
|
s.add_development_dependency 'shoulda', '~> 3.5'
|
84
78
|
s.add_development_dependency 'shoulda-matchers', '~> 2.2'
|
85
|
-
s.add_development_dependency 'minitest', '~> 5.
|
79
|
+
s.add_development_dependency 'minitest', '~> 5.7'
|
86
80
|
s.add_development_dependency 'gettext', '~> 3.1'
|
87
81
|
s.add_development_dependency 'mocha', '~> 1.1'
|
82
|
+
s.add_development_dependency 'nmatrix', '~> 0.1.0'
|
83
|
+
s.add_development_dependency 'gsl-nmatrix', '~> 1.17.0'
|
88
84
|
end
|
data/test/helpers_tests.rb
CHANGED
@@ -33,7 +33,7 @@ module Minitest
|
|
33
33
|
def assert_similar_vector(exp, obs, delta = 1e-10, msg = nil)
|
34
34
|
msg ||= "Different vectors #{exp} - #{obs}"
|
35
35
|
assert_equal(exp.size, obs.size)
|
36
|
-
exp.
|
36
|
+
exp.to_a.each_with_index {|v, i|
|
37
37
|
assert_in_delta(v, obs[i], delta)
|
38
38
|
}
|
39
39
|
end
|
data/test/test_analysis.rb
CHANGED
@@ -39,7 +39,7 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
39
39
|
should 'to_text returns the same as a normal ReportBuilder object' do
|
40
40
|
rb = ReportBuilder.new(name: :test)
|
41
41
|
section = ReportBuilder::Section.new(name: 'first')
|
42
|
-
a = [1, 2, 3]
|
42
|
+
a = Daru::Vector.new([1, 2, 3])
|
43
43
|
section.add('first')
|
44
44
|
section.add(a)
|
45
45
|
rb.add(section)
|
@@ -98,8 +98,8 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
98
98
|
end
|
99
99
|
should 'attach() allows to call objects on objects which respond to fields' do
|
100
100
|
an = Statsample::Analysis::Suite.new(:summary)
|
101
|
-
ds = {
|
102
|
-
ds.expects(:
|
101
|
+
ds = { :x => stub(mean: 10), :y => stub(mean: 12) }
|
102
|
+
ds.expects(:vectors).returns([:x, :y]).at_least_once
|
103
103
|
an.attach(ds)
|
104
104
|
assert_equal(10, an.x.mean)
|
105
105
|
assert_equal(12, an.y.mean)
|
@@ -109,10 +109,10 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
109
109
|
end
|
110
110
|
should 'attached objects should be called LIFO' do
|
111
111
|
an = Statsample::Analysis::Suite.new(:summary)
|
112
|
-
ds1 = {
|
113
|
-
ds1.expects(:
|
114
|
-
ds2 = {
|
115
|
-
ds2.expects(:
|
112
|
+
ds1 = { :x => stub(mean: 100), :y => stub(mean: 120), :z => stub(mean: 13) }
|
113
|
+
ds1.expects(:vectors).returns([:x, :y, :z]).at_least_once
|
114
|
+
ds2 = { :x => stub(mean: 10), :y => stub(mean: 12) }
|
115
|
+
ds2.expects(:vectors).returns([:x, :y]).at_least_once
|
116
116
|
an.attach(ds1)
|
117
117
|
an.attach(ds2)
|
118
118
|
assert_equal(10, an.x.mean)
|
@@ -122,10 +122,10 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
122
122
|
|
123
123
|
should 'detach() without arguments drop latest object' do
|
124
124
|
an = Statsample::Analysis::Suite.new(:summary)
|
125
|
-
ds1 = {
|
126
|
-
ds1.expects(:
|
127
|
-
ds2 = {
|
128
|
-
ds2.expects(:
|
125
|
+
ds1 = { :x => stub(mean: 100), :y => stub(mean: 120), :z => stub(mean: 13) }
|
126
|
+
ds1.expects(:vectors).returns([:x, :y, :z]).at_least_once
|
127
|
+
ds2 = { :x => stub(mean: 10), :y => stub(mean: 12) }
|
128
|
+
ds2.expects(:vectors).returns([:x, :y]).at_least_once
|
129
129
|
an.attach(ds1)
|
130
130
|
an.attach(ds2)
|
131
131
|
assert_equal(10, an.x.mean)
|
@@ -134,12 +134,12 @@ class StatsampleAnalysisTestCase < Minitest::Test
|
|
134
134
|
end
|
135
135
|
should 'detach() with argument drop select object' do
|
136
136
|
an = Statsample::Analysis::Suite.new(:summary)
|
137
|
-
ds1 = {
|
138
|
-
ds1.expects(:
|
139
|
-
ds2 = {
|
140
|
-
ds2.expects(:
|
141
|
-
ds3 = {
|
142
|
-
ds3.expects(:
|
137
|
+
ds1 = { :x => 1 }
|
138
|
+
ds1.expects(:vectors).returns([:x]).at_least_once
|
139
|
+
ds2 = { :x => 2, :y => 3 }
|
140
|
+
ds2.expects(:vectors).returns([:x, :y]).at_least_once
|
141
|
+
ds3 = { :y => 4 }
|
142
|
+
ds3.expects(:vectors).returns([:y]).at_least_once
|
143
143
|
|
144
144
|
an.attach(ds3)
|
145
145
|
an.attach(ds2)
|
data/test/test_anova_contrast.rb
CHANGED
@@ -2,12 +2,12 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
2
2
|
class StatsampleAnovaContrastTestCase < Minitest::Test
|
3
3
|
context(Statsample::Anova::Contrast) do
|
4
4
|
setup do
|
5
|
-
constant
|
6
|
-
frequent
|
7
|
-
infrequent = [15, 16, 17, 16, 16]
|
8
|
-
never
|
9
|
-
@vectors
|
10
|
-
@c
|
5
|
+
constant = Daru::Vector.new([12, 13, 11, 12, 12])
|
6
|
+
frequent = Daru::Vector.new([9, 10, 9, 13, 14])
|
7
|
+
infrequent = Daru::Vector.new([15, 16, 17, 16, 16])
|
8
|
+
never = Daru::Vector.new([17, 18, 12, 18, 20])
|
9
|
+
@vectors = [constant, frequent, infrequent, never]
|
10
|
+
@c = Statsample::Anova::Contrast.new(vectors: @vectors)
|
11
11
|
end
|
12
12
|
should 'return correct value using c' do
|
13
13
|
@c.c([1, -1.quo(3), -1.quo(3), -1.quo(3)])
|
@@ -4,14 +4,14 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
|
4
4
|
class StatsampleAnovaTwoWayWithVectorsTestCase < Minitest::Test
|
5
5
|
context(Statsample::Anova::TwoWayWithVectors) do
|
6
6
|
setup do
|
7
|
-
@pa = [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3]
|
8
|
-
@pa.
|
9
|
-
@a = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
|
10
|
-
@a.labels = { 0 => '0%', 1 => '35%' }
|
11
|
-
@a.
|
12
|
-
@b = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
13
|
-
@b.labels = { 0 => 'Young', 1 => 'Older' }
|
14
|
-
@b.
|
7
|
+
@pa = Daru::Vector.new [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3]
|
8
|
+
@pa.rename 'Passive Avoidance'
|
9
|
+
@a = Daru::Vector.new [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
|
10
|
+
# @a.labels = { 0 => '0%', 1 => '35%' }
|
11
|
+
@a.rename 'Diet'
|
12
|
+
@b = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
13
|
+
# @b.labels = { 0 => 'Young', 1 => 'Older' }
|
14
|
+
@b.rename 'Age'
|
15
15
|
@anova = Statsample::Anova::TwoWayWithVectors.new(a: @a, b: @b, dependent: @pa)
|
16
16
|
end
|
17
17
|
should 'Statsample::Anova respond to #twoway_with_vectors' do
|
@@ -3,9 +3,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
|
|
3
3
|
context(Statsample::Anova::OneWayWithVectors) do
|
4
4
|
context('when initializing') do
|
5
5
|
setup do
|
6
|
-
@v1 = 10.times.map { rand(100) }
|
7
|
-
@v2 = 10.times.map { rand(100) }
|
8
|
-
@v3 = 10.times.map { rand(100) }
|
6
|
+
@v1 = Daru::Vector.new(10.times.map { rand(100) })
|
7
|
+
@v2 = Daru::Vector.new(10.times.map { rand(100) })
|
8
|
+
@v3 = Daru::Vector.new(10.times.map { rand(100) })
|
9
9
|
end
|
10
10
|
should 'be the same using [] or args*' do
|
11
11
|
a1 = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3)
|
@@ -28,9 +28,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
setup do
|
31
|
-
@v1 = [3, 3, 2, 3, 6]
|
32
|
-
@v2 = [7, 6, 5, 6, 7]
|
33
|
-
@v3 = [9, 8, 9, 7, 8]
|
31
|
+
@v1 = Daru::Vector.new([3, 3, 2, 3, 6])
|
32
|
+
@v2 = Daru::Vector.new([7, 6, 5, 6, 7])
|
33
|
+
@v3 = Daru::Vector.new([9, 8, 9, 7, 8])
|
34
34
|
@name = 'Anova testing'
|
35
35
|
@anova = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3, name: @name)
|
36
36
|
end
|
@@ -66,10 +66,10 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
|
|
66
66
|
assert_in_delta(@anova.sst, @anova.sswg + @anova.ssbg, 0.00001)
|
67
67
|
end
|
68
68
|
should 'df total equal to number of n-1' do
|
69
|
-
assert_equal(@v1.
|
69
|
+
assert_equal(@v1.size + @v2.size + @v3.size - 1, @anova.df_total)
|
70
70
|
end
|
71
71
|
should 'df wg equal to number of n-k' do
|
72
|
-
assert_equal(@v1.
|
72
|
+
assert_equal(@v1.size + @v2.size + @v3.size - 3, @anova.df_wg)
|
73
73
|
end
|
74
74
|
should 'df bg equal to number of k-1' do
|
75
75
|
assert_equal(2, @anova.df_bg)
|
@@ -4,11 +4,11 @@ class StatsampleBartlettSphericityTestCase < Minitest::Test
|
|
4
4
|
include Statsample::Test
|
5
5
|
context Statsample::Test::BartlettSphericity do
|
6
6
|
setup do
|
7
|
-
@v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70]
|
8
|
-
@v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0]
|
9
|
-
@v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4]
|
7
|
+
@v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
|
8
|
+
@v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
|
9
|
+
@v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
|
10
10
|
# KMO: 0.490
|
11
|
-
ds = {
|
11
|
+
ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
|
12
12
|
cor = Statsample::Bivariate.correlation_matrix(ds)
|
13
13
|
@bs = Statsample::Test::BartlettSphericity.new(cor, 14)
|
14
14
|
end
|
data/test/test_bivariate.rb
CHANGED
@@ -1,38 +1,38 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleBivariateTestCase < Minitest::Test
|
3
3
|
should 'method sum of squares should be correct' do
|
4
|
-
v1 = [1, 2, 3, 4, 5, 6]
|
5
|
-
v2 = [6, 2, 4, 10, 12, 8]
|
4
|
+
v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6])
|
5
|
+
v2 = Daru::Vector.new([6, 2, 4, 10, 12, 8])
|
6
6
|
assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1, v2))
|
7
7
|
end
|
8
8
|
should_with_gsl 'return same covariance with ruby and gls implementation' do
|
9
|
-
v1 = 20.times.collect { |_a| rand }
|
10
|
-
v2 = 20.times.collect { |_a| rand }
|
9
|
+
v1 = Daru::Vector.new(20.times.collect { |_a| rand })
|
10
|
+
v2 = Daru::Vector.new(20.times.collect { |_a| rand })
|
11
11
|
assert_in_delta(Statsample::Bivariate.covariance(v1, v2), Statsample::Bivariate.covariance_slow(v1, v2), 0.001)
|
12
12
|
end
|
13
13
|
|
14
14
|
should_with_gsl 'return same correlation with ruby and gls implementation' do
|
15
|
-
v1 = 20.times.collect { |_a| rand }
|
16
|
-
v2 = 20.times.collect { |_a| rand }
|
15
|
+
v1 = Daru::Vector.new(20.times.collect { |_a| rand })
|
16
|
+
v2 = Daru::Vector.new(20.times.collect { |_a| rand })
|
17
17
|
|
18
|
-
assert_in_delta(GSL::Stats.correlation(v1.
|
18
|
+
assert_in_delta(GSL::Stats.correlation(v1.to_gsl, v2.to_gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
|
19
19
|
end
|
20
20
|
should 'return correct pearson correlation' do
|
21
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2]
|
22
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2]
|
21
|
+
v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
|
22
|
+
v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
|
23
23
|
assert_in_delta(0.525, Statsample::Bivariate.pearson(v1, v2), 0.001)
|
24
24
|
assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v1, v2), 0.001)
|
25
25
|
|
26
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil]
|
27
|
-
v4 = [2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500]
|
26
|
+
v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil])
|
27
|
+
v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6, 4, 3, 2, 500])
|
28
28
|
assert_in_delta(0.525, Statsample::Bivariate.pearson(v3, v4), 0.001)
|
29
29
|
# Test ruby method
|
30
30
|
v3a, v4a = Statsample.only_valid v3, v4
|
31
31
|
assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a, v4a), 0.001)
|
32
32
|
end
|
33
33
|
should 'return correct values for t_pearson and prop_pearson' do
|
34
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2]
|
35
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2]
|
34
|
+
v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
|
35
|
+
v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
|
36
36
|
r = Statsample::Bivariate::Pearson.new(v1, v2)
|
37
37
|
assert_in_delta(0.525, r.r, 0.001)
|
38
38
|
assert_in_delta(Statsample::Bivariate.t_pearson(v1, v2), r.t, 0.001)
|
@@ -40,11 +40,11 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
40
40
|
assert(r.summary.size > 0)
|
41
41
|
end
|
42
42
|
should 'return correct correlation_matrix with nils values' do
|
43
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2]
|
44
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2]
|
45
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8]
|
46
|
-
v4 = [2, nil, nil, nil, 3, 7, 8, 6]
|
47
|
-
ds = {
|
43
|
+
v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
|
44
|
+
v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
|
45
|
+
v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
|
46
|
+
v4 = Daru::Vector.new([2, nil, nil, nil, 3, 7, 8, 6])
|
47
|
+
ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
|
48
48
|
c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
|
49
49
|
expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
|
50
50
|
[c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
|
@@ -61,13 +61,13 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
61
61
|
end
|
62
62
|
should_with_gsl 'return same values for optimized and pairwise covariance matrix' do
|
63
63
|
cases = 100
|
64
|
-
v1 =
|
65
|
-
v2 =
|
66
|
-
v3 =
|
67
|
-
v4 =
|
68
|
-
v5 =
|
64
|
+
v1 = Daru::Vector.new_with_size(cases) { rand }
|
65
|
+
v2 = Daru::Vector.new_with_size(cases) { rand }
|
66
|
+
v3 = Daru::Vector.new_with_size(cases) { rand }
|
67
|
+
v4 = Daru::Vector.new_with_size(cases) { rand }
|
68
|
+
v5 = Daru::Vector.new_with_size(cases) { rand }
|
69
69
|
|
70
|
-
ds = {
|
70
|
+
ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
|
71
71
|
|
72
72
|
cor_opt = Statsample::Bivariate.covariance_matrix_optimized(ds)
|
73
73
|
|
@@ -76,13 +76,14 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
76
76
|
end
|
77
77
|
should_with_gsl 'return same values for optimized and pairwise correlation matrix' do
|
78
78
|
cases = 100
|
79
|
-
v1 =
|
80
|
-
v2 =
|
81
|
-
v3 =
|
82
|
-
v4 =
|
83
|
-
v5 =
|
79
|
+
v1 = Daru::Vector.new_with_size(cases) { rand }
|
80
|
+
v2 = Daru::Vector.new_with_size(cases) { rand }
|
81
|
+
v3 = Daru::Vector.new_with_size(cases) { rand }
|
82
|
+
v4 = Daru::Vector.new_with_size(cases) { rand }
|
83
|
+
v5 = Daru::Vector.new_with_size(cases) { rand }
|
84
84
|
|
85
|
-
ds = {
|
85
|
+
ds = Daru::DataFrame.new({
|
86
|
+
:v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
|
86
87
|
|
87
88
|
cor_opt = Statsample::Bivariate.correlation_matrix_optimized(ds)
|
88
89
|
|
@@ -90,11 +91,11 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
90
91
|
assert_equal_matrix(cor_opt, cor_pw, 1e-15)
|
91
92
|
end
|
92
93
|
should 'return correct correlation_matrix without nils values' do
|
93
|
-
v1 = [6, 5, 4, 7, 8, 4, 3, 2]
|
94
|
-
v2 = [2, 3, 7, 8, 6, 4, 3, 2]
|
95
|
-
v3 = [6, 2, 1000, 1000, 5, 4, 7, 8]
|
96
|
-
v4 = [2, 4, 6, 7, 3, 7, 8, 6]
|
97
|
-
ds = {
|
94
|
+
v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
|
95
|
+
v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
|
96
|
+
v3 = Daru::Vector.new([6, 2, 1000, 1000, 5, 4, 7, 8])
|
97
|
+
v4 = Daru::Vector.new([2, 4, 6, 7, 3, 7, 8, 6])
|
98
|
+
ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
|
98
99
|
c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
|
99
100
|
expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
|
100
101
|
[c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
|
@@ -129,25 +130,25 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
129
130
|
end
|
130
131
|
|
131
132
|
should "return correct value for Spearman's rho" do
|
132
|
-
v1 = [86, 97, 99, 100, 101, 103, 106, 110, 112, 113]
|
133
|
-
v2 = [0, 20, 28, 27, 50, 29, 7, 17, 6, 12]
|
133
|
+
v1 =Daru::Vector.new( [86, 97, 99, 100, 101, 103, 106, 110, 112, 113])
|
134
|
+
v2 =Daru::Vector.new( [0, 20, 28, 27, 50, 29, 7, 17, 6, 12])
|
134
135
|
assert_in_delta(-0.175758, Statsample::Bivariate.spearman(v1, v2), 0.0001)
|
135
136
|
end
|
136
137
|
should 'return correct value for point_biserial correlation' do
|
137
|
-
c = [1, 3, 5, 6, 7, 100, 200, 300, 400, 300]
|
138
|
-
d = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
|
138
|
+
c = Daru::Vector.new([1, 3, 5, 6, 7, 100, 200, 300, 400, 300])
|
139
|
+
d = Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
|
139
140
|
assert_raises TypeError do
|
140
141
|
Statsample::Bivariate.point_biserial(c, d)
|
141
142
|
end
|
142
143
|
assert_in_delta(Statsample::Bivariate.point_biserial(d, c), Statsample::Bivariate.pearson(d, c), 0.0001)
|
143
144
|
end
|
144
145
|
should 'return correct value for tau_a and tau_b' do
|
145
|
-
v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
146
|
-
v2 = [1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11]
|
146
|
+
v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
|
147
|
+
v2 = Daru::Vector.new([1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11])
|
147
148
|
assert_in_delta(0.6727, Statsample::Bivariate.tau_a(v1, v2), 0.001)
|
148
149
|
assert_in_delta(0.6727, Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1, v2).to_matrix)), 0.001)
|
149
|
-
v1 = [12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27]
|
150
|
-
v2 = [11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0]
|
150
|
+
v1 = Daru::Vector.new([12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27])
|
151
|
+
v2 = Daru::Vector.new([11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0])
|
151
152
|
assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1, v2).to_matrix), 0.001)
|
152
153
|
end
|
153
154
|
should 'return correct value for gamma correlation' do
|
@@ -156,4 +157,8 @@ class StatsampleBivariateTestCase < Minitest::Test
|
|
156
157
|
m2 = Matrix[[15, 12, 6, 5], [12, 8, 10, 8], [4, 6, 9, 10]]
|
157
158
|
assert_in_delta(0.349, Statsample::Bivariate.gamma(m2), 0.001)
|
158
159
|
end
|
160
|
+
|
161
|
+
should 'return correct residuals' do
|
162
|
+
# TODO: test Statsample::Bivariate.residuals
|
163
|
+
end
|
159
164
|
end
|
data/test/test_codification.rb
CHANGED
@@ -1,33 +1,33 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
2
|
class StatsampleCodificationTestCase < Minitest::Test
|
3
3
|
def initialize(*args)
|
4
|
-
v1 = %w(run walk,run walking running sleep sleeping,dreaming sleep,dream)
|
4
|
+
v1 = Daru::Vector.new(%w(run walk,run walking running sleep sleeping,dreaming sleep,dream))
|
5
5
|
@dict = { 'run' => 'r', 'walk' => 'w', 'walking' => 'w', 'running' => 'r', 'sleep' => 's', 'sleeping' => 's', 'dream' => 'd', 'dreaming' => 'd' }
|
6
|
-
@ds = {
|
6
|
+
@ds = Daru::DataFrame.new({ :v1 => v1 })
|
7
7
|
super
|
8
8
|
end
|
9
9
|
|
10
10
|
def test_create_hash
|
11
11
|
expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
|
12
|
-
hash = Statsample::Codification.create_hash(@ds, [
|
13
|
-
assert_equal([
|
14
|
-
assert_equal(expected_keys_v1, hash[
|
15
|
-
assert_equal(expected_keys_v1, hash[
|
12
|
+
hash = Statsample::Codification.create_hash(@ds, [:v1])
|
13
|
+
assert_equal([:v1], hash.keys)
|
14
|
+
assert_equal(expected_keys_v1, hash[:v1].keys.sort)
|
15
|
+
assert_equal(expected_keys_v1, hash[:v1].values.sort)
|
16
16
|
end
|
17
17
|
|
18
18
|
def test_create_excel
|
19
19
|
filename = Dir.tmpdir + '/test_excel' + Time.now.to_s + '.xls'
|
20
20
|
# filename = Tempfile.new("test_codification_"+Time.now().to_s)
|
21
21
|
Statsample::Codification.create_excel(@ds, ['v1'], filename)
|
22
|
-
field = (['v1'] * 8)
|
23
|
-
keys = %w(dream dreaming run running sleep sleeping walk walking)
|
24
|
-
ds =
|
25
|
-
assert_equal(field, ds[
|
26
|
-
assert_equal(keys, ds[
|
27
|
-
assert_equal(keys, ds[
|
22
|
+
field = Daru::Vector.new(['v1'] * 8, name: :field)
|
23
|
+
keys = Daru::Vector.new(%w(dream dreaming run running sleep sleeping walk walking))
|
24
|
+
ds = Daru::DataFrame.from_excel(filename)
|
25
|
+
assert_equal(field, ds[:field])
|
26
|
+
assert_equal(keys, ds[:original])
|
27
|
+
assert_equal(keys, ds[:recoded])
|
28
28
|
hash = Statsample::Codification.excel_to_recoded_hash(filename)
|
29
|
-
assert_equal(keys.
|
30
|
-
assert_equal(keys.
|
29
|
+
assert_equal(keys.to_a, hash[:v1].keys.sort)
|
30
|
+
assert_equal(keys.to_a, hash[:v1].values.sort)
|
31
31
|
end
|
32
32
|
|
33
33
|
def test_create_yaml
|
@@ -35,44 +35,44 @@ class StatsampleCodificationTestCase < Minitest::Test
|
|
35
35
|
Statsample::Codification.create_yaml(@ds, [])
|
36
36
|
end
|
37
37
|
expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
|
38
|
-
yaml_hash = Statsample::Codification.create_yaml(@ds, [
|
38
|
+
yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1])
|
39
39
|
h = YAML.load(yaml_hash)
|
40
|
-
assert_equal([
|
41
|
-
assert_equal(expected_keys_v1, h[
|
40
|
+
assert_equal([:v1], h.keys)
|
41
|
+
assert_equal(expected_keys_v1, h[:v1].keys.sort)
|
42
42
|
tf = Tempfile.new('test_codification')
|
43
|
-
yaml_hash = Statsample::Codification.create_yaml(@ds, [
|
43
|
+
yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1], tf, Statsample::SPLIT_TOKEN)
|
44
44
|
tf.close
|
45
45
|
tf.open
|
46
46
|
h = YAML.load(tf)
|
47
|
-
assert_equal([
|
48
|
-
assert_equal(expected_keys_v1, h[
|
47
|
+
assert_equal([:v1], h.keys)
|
48
|
+
assert_equal(expected_keys_v1, h[:v1].keys.sort)
|
49
49
|
tf.close(true)
|
50
50
|
end
|
51
51
|
|
52
52
|
def test_recodification
|
53
53
|
expected = [['r'], %w(w r), ['w'], ['r'], ['s'], %w(s d), %w(s d)]
|
54
|
-
assert_equal(expected, Statsample::Codification.recode_vector(@ds[
|
55
|
-
v2 = ['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking']
|
54
|
+
assert_equal(expected, Statsample::Codification.recode_vector(@ds[:v1], @dict))
|
55
|
+
v2 = Daru::Vector.new(['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'])
|
56
56
|
expected = [['r'], %w(w d), nil, %w(w d)]
|
57
57
|
assert_equal(expected, Statsample::Codification.recode_vector(v2, @dict))
|
58
58
|
end
|
59
59
|
|
60
60
|
def test_recode_dataset_simple
|
61
|
-
Statsample::Codification.recode_dataset_simple!(@ds,
|
62
|
-
expected_vector = ['r', 'w,r', 'w', 'r', 's', 's,d', 's,d']
|
63
|
-
assert_not_equal(expected_vector, @ds[
|
64
|
-
assert_equal(expected_vector, @ds[
|
61
|
+
Statsample::Codification.recode_dataset_simple!(@ds, :v1 => @dict)
|
62
|
+
expected_vector = Daru::Vector.new(['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'])
|
63
|
+
assert_not_equal(expected_vector, @ds[:v1])
|
64
|
+
assert_equal(expected_vector, @ds[:v1_recoded])
|
65
65
|
end
|
66
66
|
|
67
67
|
def test_recode_dataset_split
|
68
|
-
Statsample::Codification.recode_dataset_split!(@ds,
|
68
|
+
Statsample::Codification.recode_dataset_split!(@ds, :v1 => @dict)
|
69
69
|
e = {}
|
70
|
-
e['r'] = [1, 1, 0, 1, 0, 0, 0]
|
71
|
-
e['w'] = [0, 1, 1, 0, 0, 0, 0]
|
72
|
-
e['s'] = [0, 0, 0, 0, 1, 1, 1]
|
73
|
-
e['d'] = [0, 0, 0, 0, 0, 1, 1]
|
74
|
-
e.each{|k, expected|
|
75
|
-
assert_equal(expected, @ds['v1_' + k], "Error on key #{k}")
|
70
|
+
e['r'] = Daru::Vector.new([1, 1, 0, 1, 0, 0, 0])
|
71
|
+
e['w'] = Daru::Vector.new([0, 1, 1, 0, 0, 0, 0])
|
72
|
+
e['s'] = Daru::Vector.new([0, 0, 0, 0, 1, 1, 1])
|
73
|
+
e['d'] = Daru::Vector.new([0, 0, 0, 0, 0, 1, 1])
|
74
|
+
e.each { |k, expected|
|
75
|
+
assert_equal(expected, @ds[('v1_' + k).to_sym], "Error on key #{k}")
|
76
76
|
}
|
77
77
|
end
|
78
78
|
end
|