statsample-ekatena 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
# require 'rserve'
|
3
|
+
# require 'statsample/rserve_extension'
|
4
|
+
|
5
|
+
class StatsampleFactorMpaTestCase < Minitest::Test
|
6
|
+
context Statsample::Factor::MAP do
|
7
|
+
setup do
|
8
|
+
m = Matrix[
|
9
|
+
[1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
|
10
|
+
[0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
|
11
|
+
[0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
|
12
|
+
[0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
|
13
|
+
[0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
|
14
|
+
[0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
|
15
|
+
[0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
|
16
|
+
[0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
|
17
|
+
]
|
18
|
+
@map = Statsample::Factor::MAP.new(m)
|
19
|
+
end
|
20
|
+
should 'return correct values with pure ruby' do
|
21
|
+
@map.use_gsl = false
|
22
|
+
map_assertions(@map)
|
23
|
+
end
|
24
|
+
should_with_gsl 'return correct values with gsl' do
|
25
|
+
# require 'ruby-prof'
|
26
|
+
|
27
|
+
@map.use_gsl = true
|
28
|
+
map_assertions(@map)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def map_assertions(map)
|
33
|
+
assert_in_delta(map.minfm, 0.066445, 0.00001)
|
34
|
+
assert_equal(map.number_of_factors, 2)
|
35
|
+
assert_in_delta(map.fm[0], 0.312475, 0.00001)
|
36
|
+
assert_in_delta(map.fm[1], 0.245121, 0.00001)
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
# require 'rserve'
|
3
|
+
# require 'statsample/rserve_extension'
|
4
|
+
|
5
|
+
class StatsampleFactorTestCase < Minitest::Test
|
6
|
+
include Statsample::Fixtures
|
7
|
+
# Based on Hardle and Simar
|
8
|
+
def setup
|
9
|
+
@fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_parallelanalysis_with_data
|
13
|
+
if Statsample.has_gsl?
|
14
|
+
samples = 100
|
15
|
+
variables = 10
|
16
|
+
iterations = 50
|
17
|
+
rng = Distribution::Normal.rng
|
18
|
+
f1 = Daru::Vector.new(samples.times.collect { rng.call })
|
19
|
+
f2 = Daru::Vector.new(samples.times.collect { rng.call })
|
20
|
+
vectors = {}
|
21
|
+
variables.times do |i|
|
22
|
+
if i < 5
|
23
|
+
vectors["v#{i}".to_sym] = Daru::Vector.new(
|
24
|
+
samples.times.collect { |nv|
|
25
|
+
f1[nv] * 5 + f2[nv] * 2 + rng.call
|
26
|
+
}
|
27
|
+
)
|
28
|
+
else
|
29
|
+
vectors["v#{i}".to_sym] = Daru::Vector.new(
|
30
|
+
samples.times.collect { |nv|
|
31
|
+
f2[nv] * 5 + f1[nv] * 2 + rng.call
|
32
|
+
}
|
33
|
+
)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
ds = Daru::DataFrame.new(vectors)
|
37
|
+
|
38
|
+
pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
|
39
|
+
pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
|
40
|
+
3.times do |n|
|
41
|
+
var = "ev_0000#{n + 1}".to_sym
|
42
|
+
assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.07)
|
43
|
+
end
|
44
|
+
else
|
45
|
+
skip('Too slow without GSL')
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_parallelanalysis
|
50
|
+
pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
|
51
|
+
assert_in_delta(1.2454, pa.ds_eigenvalues[:ev_00001].mean, 0.05)
|
52
|
+
assert_in_delta(1.1542, pa.ds_eigenvalues[:ev_00002].mean, 0.01)
|
53
|
+
assert_in_delta(1.0836, pa.ds_eigenvalues[:ev_00003].mean, 0.01)
|
54
|
+
assert(pa.summary.size > 0)
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
require 'minitest/autorun'
|
3
|
+
|
4
|
+
describe Statsample::FitModel do
|
5
|
+
before do
|
6
|
+
@df = Daru::DataFrame.from_csv 'test/fixtures/df.csv'
|
7
|
+
@df.to_category 'c', 'd', 'e'
|
8
|
+
end
|
9
|
+
context '#df_for_regression' do
|
10
|
+
context 'no interaction' do
|
11
|
+
it { assert_vectors_from_formula 'y~a+e', %w[a e_B e_C y] }
|
12
|
+
end
|
13
|
+
|
14
|
+
context '2-way interaction' do
|
15
|
+
context 'interaction of numerical with numerical' do
|
16
|
+
context 'none reoccur' do
|
17
|
+
it { assert_vectors_from_formula 'y~a:b', %w[a:b y] }
|
18
|
+
end
|
19
|
+
|
20
|
+
context 'one reoccur' do
|
21
|
+
it { assert_vectors_from_formula 'y~a+a:b', %w[a a:b y] }
|
22
|
+
end
|
23
|
+
|
24
|
+
context 'both reoccur' do
|
25
|
+
it { assert_vectors_from_formula 'y~a+b+a:b', %w[a a:b b y] }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
context 'interaction of category with numerical' do
|
30
|
+
context 'none reoccur' do
|
31
|
+
it { assert_vectors_from_formula 'y~a:e', %w[e_A:a e_B:a e_C:a y] }
|
32
|
+
end
|
33
|
+
|
34
|
+
context 'one reoccur' do
|
35
|
+
context 'numeric occur' do
|
36
|
+
it { assert_vectors_from_formula 'y~a+a:e', %w[a e_B:a e_C:a y] }
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'category occur' do
|
40
|
+
it { assert_vectors_from_formula 'y~e+a:e',
|
41
|
+
%w[e_B e_C e_A:a e_B:a e_C:a y] }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
context 'both reoccur' do
|
46
|
+
it { assert_vectors_from_formula 'y~a+e+a:e',
|
47
|
+
%w[a e_B e_C e_B:a e_C:a y] }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
context 'interaction of category with category' do
|
52
|
+
context 'none reoccur' do
|
53
|
+
it { assert_vectors_from_formula 'y~c:e',
|
54
|
+
%w[e_B e_C c_yes:e_A c_yes:e_B c_yes:e_C y] }
|
55
|
+
end
|
56
|
+
|
57
|
+
context 'one reoccur' do
|
58
|
+
it { assert_vectors_from_formula 'y~e+c:e',
|
59
|
+
%w[e_B e_C c_yes:e_A c_yes:e_B c_yes:e_C y] }
|
60
|
+
end
|
61
|
+
|
62
|
+
context 'both reoccur' do
|
63
|
+
it { assert_vectors_from_formula 'y~c+e+c:e',
|
64
|
+
%w[c_yes e_B e_C c_yes:e_B c_yes:e_C y] }
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
context 'corner case' do
|
70
|
+
context 'example 1' do
|
71
|
+
it { assert_vectors_from_formula 'y~d:a+d:e',
|
72
|
+
%w[e_B e_C d_male:e_A d_male:e_B d_male:e_C d_female:a d_male:a y] }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context 'complex examples' do
|
77
|
+
context 'random example 1' do
|
78
|
+
it { assert_vectors_from_formula 'y~a+e+c:d+e:d',
|
79
|
+
%w[e_B e_C d_male c_yes:d_female c_yes:d_male e_B:d_male e_C:d_male a y] }
|
80
|
+
end
|
81
|
+
|
82
|
+
context 'random example 2' do
|
83
|
+
it { assert_vectors_from_formula 'y~e+b+c+d:e+b:e+a:e+0',
|
84
|
+
%w[e_A e_B e_C c_yes d_male:e_A d_male:e_B d_male:e_C b e_B:b e_C:b e_A:a e_B:a e_C:a y] }
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
data/test/test_ggobi.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
require 'ostruct'
|
3
|
+
class StatsampleGGobiTestCase < Minitest::Test
|
4
|
+
def setup
|
5
|
+
v1 = Daru::Vector.new([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10)
|
6
|
+
@v2 = Daru::Vector.new(%w(a b c a a a b b c d) * 10)
|
7
|
+
@v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
|
8
|
+
v3 = Daru::Vector.new([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10)
|
9
|
+
@ds = Daru::DataFrame.new({ :v1 => v1, :v2 => @v2, :v3 => v3 })
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_values_definition
|
13
|
+
a = [1.0, 2, 'a', nil]
|
14
|
+
assert_equal('1.0 2 a NA', Statsample::GGobi.values_definition(a, 'NA'))
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_variable_definition
|
18
|
+
carrier = OpenStruct.new
|
19
|
+
carrier.categorials = []
|
20
|
+
carrier.conversions = {}
|
21
|
+
real_var_definition = Statsample::GGobi.variable_definition(carrier, @v2, 'variable 2', 'v2')
|
22
|
+
expected = <<-EOS
|
23
|
+
<categoricalvariable name="variable 2" nickname="v2">
|
24
|
+
<levels count="4">
|
25
|
+
<level value="1">letter a</level>
|
26
|
+
<level value="2">b</level>
|
27
|
+
<level value="3">c</level>
|
28
|
+
<level value="4">letter d</level></levels>
|
29
|
+
</categoricalvariable>
|
30
|
+
EOS
|
31
|
+
assert_equal(expected.gsub(/\s/, ' '), real_var_definition.gsub(/\s/, ' '))
|
32
|
+
assert_equal({ 'variable 2' => { 'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4 } }, carrier.conversions)
|
33
|
+
assert_equal(['variable 2'], carrier.categorials)
|
34
|
+
end
|
35
|
+
end
|
data/test/test_gsl.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
class StatsampleGSLTestCase < Minitest::Test
|
3
|
+
should_with_gsl 'matrix with gsl' do
|
4
|
+
a = Daru::Vector.new([1, 2, 3, 4, 20])
|
5
|
+
b = Daru::Vector.new([3, 2, 3, 4, 50])
|
6
|
+
c = Daru::Vector.new([6, 2, 3, 4, 3])
|
7
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
|
8
|
+
gsl = ds.to_matrix.to_gsl
|
9
|
+
assert_equal(5, gsl.size1)
|
10
|
+
assert_equal(3, gsl.size2)
|
11
|
+
matrix = gsl.to_matrix
|
12
|
+
assert_equal(5, matrix.row_size)
|
13
|
+
assert_equal(3, matrix.column_size)
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
|
3
|
+
class StatsampleHistogramTestCase < Minitest::Test
|
4
|
+
context Statsample::Histogram do
|
5
|
+
should 'alloc correctly with integer' do
|
6
|
+
h = Statsample::Histogram.alloc(4)
|
7
|
+
assert_equal([0.0] * 4, h.bin)
|
8
|
+
assert_equal([0.0] * 5, h.range)
|
9
|
+
end
|
10
|
+
should 'alloc correctly with array' do
|
11
|
+
h = Statsample::Histogram.alloc([1, 3, 7, 9, 20])
|
12
|
+
assert_equal([0.0] * 4, h.bin)
|
13
|
+
assert_equal([1, 3, 7, 9, 20], h.range)
|
14
|
+
end
|
15
|
+
should 'alloc correctly with integer and min, max array' do
|
16
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
17
|
+
assert_equal([0.0, 1.0, 2.0, 3.0, 4.0, 5.0], h.range)
|
18
|
+
assert_equal([0.0] * 5, h.bin)
|
19
|
+
end
|
20
|
+
should 'bin() method return correct number of bins' do
|
21
|
+
h = Statsample::Histogram.alloc(4)
|
22
|
+
assert_equal(4, h.bins)
|
23
|
+
end
|
24
|
+
should 'increment correctly' do
|
25
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
26
|
+
h.increment 2.5
|
27
|
+
assert_equal([0.0, 0.0, 1.0, 0.0, 0.0], h.bin)
|
28
|
+
h.increment [0.5, 0.5, 3.5, 3.5]
|
29
|
+
assert_equal([2.0, 0.0, 1.0, 2.0, 0.0], h.bin)
|
30
|
+
h.increment 0
|
31
|
+
assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
|
32
|
+
h.increment 5
|
33
|
+
assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
|
34
|
+
end
|
35
|
+
|
36
|
+
should 'alloc_uniform correctly with n, min,max' do
|
37
|
+
h = Statsample::Histogram.alloc_uniform(5, 0, 10)
|
38
|
+
assert_equal(5, h.bins)
|
39
|
+
assert_equal([0.0] * 5, h.bin)
|
40
|
+
assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
|
41
|
+
end
|
42
|
+
should 'alloc_uniform correctly with n, [min,max]' do
|
43
|
+
h = Statsample::Histogram.alloc_uniform(5, [0, 10])
|
44
|
+
assert_equal(5, h.bins)
|
45
|
+
assert_equal([0.0] * 5, h.bin)
|
46
|
+
assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
|
47
|
+
end
|
48
|
+
should 'get_range()' do
|
49
|
+
h = Statsample::Histogram.alloc_uniform(5, 2, 12)
|
50
|
+
5.times {|i|
|
51
|
+
assert_equal([2 + i * 2, 4 + i * 2], h.get_range(i))
|
52
|
+
}
|
53
|
+
end
|
54
|
+
should 'min() and max()' do
|
55
|
+
h = Statsample::Histogram.alloc_uniform(5, 2, 12)
|
56
|
+
assert_equal(2, h.min)
|
57
|
+
assert_equal(12, h.max)
|
58
|
+
end
|
59
|
+
should 'max_val()' do
|
60
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
61
|
+
100.times { h.increment(rand * 5) }
|
62
|
+
max = h.bin[0]
|
63
|
+
(1..4).each {|i|
|
64
|
+
max = h.bin[i] if h.bin[i] > max
|
65
|
+
}
|
66
|
+
assert_equal(max, h.max_val)
|
67
|
+
end
|
68
|
+
should 'min_val()' do
|
69
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
70
|
+
100.times { h.increment(rand * 5) }
|
71
|
+
min = h.bin[0]
|
72
|
+
(1..4).each {|i|
|
73
|
+
min = h.bin[i] if h.bin[i] < min
|
74
|
+
}
|
75
|
+
assert_equal(min, h.min_val)
|
76
|
+
end
|
77
|
+
should 'return correct estimated mean' do
|
78
|
+
a = Daru::Vector.new([1.5, 1.5, 1.5, 3.5, 3.5, 3.5])
|
79
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
80
|
+
h.increment(a)
|
81
|
+
assert_equal(2.5, h.estimated_mean)
|
82
|
+
end
|
83
|
+
should 'return correct estimated standard deviation' do
|
84
|
+
a = Daru::Vector.new([0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5])
|
85
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
86
|
+
h.increment(a)
|
87
|
+
assert_equal(a.sd, h.estimated_standard_deviation)
|
88
|
+
end
|
89
|
+
should 'return correct sum for all values' do
|
90
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
91
|
+
n = rand(100)
|
92
|
+
n.times { h.increment(1) }
|
93
|
+
assert_equal(n, h.sum)
|
94
|
+
end
|
95
|
+
should 'return correct sum for a subset of values' do
|
96
|
+
h = Statsample::Histogram.alloc(5, [0, 5])
|
97
|
+
h.increment([0.5, 2.5, 4.5])
|
98
|
+
assert_equal(1, h.sum(0, 1))
|
99
|
+
assert_equal(2, h.sum(1, 4))
|
100
|
+
end
|
101
|
+
should 'not raise exception when all values equal' do
|
102
|
+
assert_nothing_raised do
|
103
|
+
a = Daru::Vector.new([5, 5, 5, 5, 5, 5])
|
104
|
+
h = Statsample::Graph::Histogram.new(a)
|
105
|
+
h.to_svg
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
data/test/test_matrix.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
|
3
|
+
class StatsampleMatrixTestCase < Minitest::Test
|
4
|
+
def test_to_dataset
|
5
|
+
m = Matrix[[1, 4], [2, 5], [3, 6]]
|
6
|
+
m.extend Statsample::NamedMatrix
|
7
|
+
m.fields_y = [:x1, :x2]
|
8
|
+
m.name = 'test'
|
9
|
+
samples = 100
|
10
|
+
x1 =Daru::Vector.new([1, 2, 3])
|
11
|
+
x2 =Daru::Vector.new([4, 5, 6])
|
12
|
+
ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
|
13
|
+
ds.rename 'test'
|
14
|
+
obs = m.to_dataframe
|
15
|
+
assert_equal(ds[:x1], obs[:x1])
|
16
|
+
assert_equal(ds[:x2], obs[:x2])
|
17
|
+
assert_equal(ds[:x1].mean, obs[:x1].mean)
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_covariate
|
21
|
+
a = Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
|
22
|
+
a.extend Statsample::CovariateMatrix
|
23
|
+
a.fields = %w(a b c)
|
24
|
+
assert_equal(:correlation, a._type)
|
25
|
+
|
26
|
+
assert_equal(Matrix[[0.5], [0.3]], a.submatrix(%w(c a), %w(b)))
|
27
|
+
assert_equal(Matrix[[1.0, 0.2], [0.2, 1.0]], a.submatrix(%w(c a)))
|
28
|
+
assert_equal(:correlation, a.submatrix(%w(c a))._type)
|
29
|
+
|
30
|
+
a = Matrix[[20, 30, 10], [30, 60, 50], [10, 50, 50]]
|
31
|
+
|
32
|
+
a.extend Statsample::CovariateMatrix
|
33
|
+
|
34
|
+
assert_equal(:covariance, a._type)
|
35
|
+
|
36
|
+
a = Daru::Vector.new(50.times.collect { rand })
|
37
|
+
b = Daru::Vector.new(50.times.collect { rand })
|
38
|
+
c = Daru::Vector.new(50.times.collect { rand })
|
39
|
+
ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
|
40
|
+
corr = Statsample::Bivariate.correlation_matrix(ds)
|
41
|
+
real = Statsample::Bivariate.covariance_matrix(ds).correlation
|
42
|
+
corr.row_size.times do |i|
|
43
|
+
corr.column_size.times do |j|
|
44
|
+
assert_in_delta(corr[i, j], real[i, j], 1e-15)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,176 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
|
2
|
+
|
3
|
+
class StatsampleMultisetTestCase < Minitest::Test
|
4
|
+
def setup
|
5
|
+
@x = Daru::Vector.new(%w(a a a a b b b b))
|
6
|
+
@y = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8])
|
7
|
+
@z = Daru::Vector.new([10, 11, 12, 13, 14, 15, 16, 17])
|
8
|
+
@ds = Daru::DataFrame.new({ :x => @x, :y => @y, :z => @z })
|
9
|
+
@ms = @ds.to_multiset_by_split(:x)
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_creation
|
13
|
+
v1a = Daru::Vector.new([1, 2, 3, 4, 5])
|
14
|
+
v2b = Daru::Vector.new([11, 21, 31, 41, 51])
|
15
|
+
v3c = Daru::Vector.new([21, 23, 34, 45, 56])
|
16
|
+
ds1 = Daru::DataFrame.new({ :v1 => v1a, :v2 => v2b, :v3 => v3c })
|
17
|
+
v1b = Daru::Vector.new([15, 25, 35, 45, 55])
|
18
|
+
v2b = Daru::Vector.new([11, 21, 31, 41, 51])
|
19
|
+
v3b = Daru::Vector.new([21, 23, 34, 45, 56])
|
20
|
+
ds2 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b, :v3 => v3b })
|
21
|
+
ms = Statsample::Multiset.new([:v1, :v2, :v3])
|
22
|
+
ms.add_dataset(:ds1, ds1)
|
23
|
+
ms.add_dataset(:ds2, ds2)
|
24
|
+
assert_equal(ds1, ms[:ds1])
|
25
|
+
assert_equal(ds2, ms[:ds2])
|
26
|
+
assert_equal(v1a, ms[:ds1][:v1])
|
27
|
+
assert_not_equal(v1b, ms[:ds1][:v1])
|
28
|
+
ds3 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b })
|
29
|
+
assert_raise ArgumentError do
|
30
|
+
ms.add_dataset(ds3)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_creation_empty
|
35
|
+
ms = Statsample::Multiset.new_empty_vectors([:id, :age, :name], [:male, :female])
|
36
|
+
ds_male = Daru::DataFrame.new({
|
37
|
+
:id => Daru::Vector.new([]),
|
38
|
+
:age => Daru::Vector.new([]),
|
39
|
+
:name => Daru::Vector.new([])
|
40
|
+
}, order: [:id, :age, :name])
|
41
|
+
|
42
|
+
ds_female = Daru::DataFrame.new({
|
43
|
+
:id => Daru::Vector.new([]),
|
44
|
+
:age => Daru::Vector.new([]),
|
45
|
+
:name => Daru::Vector.new([])
|
46
|
+
}, order: [:id, :age, :name])
|
47
|
+
|
48
|
+
ms2 = Statsample::Multiset.new([:id, :age, :name])
|
49
|
+
ms2.add_dataset(:male, ds_male)
|
50
|
+
ms2.add_dataset(:female, ds_female)
|
51
|
+
assert_equal(ms2.fields, ms.fields)
|
52
|
+
assert_equal(ms2[:male], ms[:male])
|
53
|
+
assert_equal(ms2[:female], ms[:female])
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_to_multiset_by_split_one
|
57
|
+
sex = Daru::Vector.new(%w(m m m m m f f f f m))
|
58
|
+
city = Daru::Vector.new(%w(London Paris NY London Paris NY London Paris NY Tome))
|
59
|
+
age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
|
60
|
+
ds = Daru::DataFrame.new({ :sex => sex, :city => city, :age => age })
|
61
|
+
ms = ds.to_multiset_by_split(:sex)
|
62
|
+
assert_equal(2, ms.n_datasets)
|
63
|
+
assert_equal(%w(f m), ms.datasets.keys.sort)
|
64
|
+
assert_equal(6, ms['m'].nrows)
|
65
|
+
assert_equal(4, ms['f'].nrows)
|
66
|
+
assert_equal(%w(London Paris NY London Paris Tome), ms['m'][:city].to_a)
|
67
|
+
assert_equal([34, 33, 35, 36], ms['f'][:age].to_a)
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_to_multiset_by_split_multiple
|
71
|
+
sex = Daru::Vector.new(%w(m m m m m m m m m m f f f f f f f f f f))
|
72
|
+
city = Daru::Vector.new(%w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris))
|
73
|
+
hair = Daru::Vector.new(%w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black))
|
74
|
+
age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
|
75
|
+
ds = Daru::DataFrame.new({
|
76
|
+
:sex => sex, :city => city, :hair => hair, :age => age
|
77
|
+
}, order: [:sex, :city, :hair, :age])
|
78
|
+
ms = ds.to_multiset_by_split(:sex, :city, :hair)
|
79
|
+
assert_equal(8, ms.n_datasets)
|
80
|
+
assert_equal(3, ms[%w(m London blonde)].nrows)
|
81
|
+
assert_equal(3, ms[%w(m London blonde)].nrows)
|
82
|
+
assert_equal(1, ms[%w(m Paris black)].nrows)
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_stratum_proportion
|
86
|
+
ds1 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) })
|
87
|
+
ds2 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 1, 1, 0, 0]) })
|
88
|
+
assert_equal(5.0 / 12, ds1[:q1].proportion)
|
89
|
+
assert_equal(7.0 / 9, ds2[:q1].proportion)
|
90
|
+
ms = Statsample::Multiset.new([:q1])
|
91
|
+
ms.add_dataset(:d1, ds1)
|
92
|
+
ms.add_dataset(:d2, ds2)
|
93
|
+
ss = Statsample::StratifiedSample.new(ms, :d1 => 50, :d2 => 100)
|
94
|
+
assert_in_delta(0.655, ss.proportion(:q1), 0.01)
|
95
|
+
assert_in_delta(0.345, ss.proportion(:q1, 0), 0.01)
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_stratum_scale
|
99
|
+
boys = Daru::DataFrame.new({ :test => Daru::Vector.new([50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90]) })
|
100
|
+
girls =Daru::DataFrame.new({ :test => Daru::Vector.new( [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90]) })
|
101
|
+
ms = Statsample::Multiset.new([:test])
|
102
|
+
ms.add_dataset(:boys, boys)
|
103
|
+
ms.add_dataset(:girls, girls)
|
104
|
+
ss = Statsample::StratifiedSample.new(ms, :boys => 10_000, :girls => 10_000)
|
105
|
+
assert_equal(2, ss.strata_number)
|
106
|
+
assert_equal(20_000, ss.population_size)
|
107
|
+
assert_equal(10_000, ss.stratum_size(:boys))
|
108
|
+
assert_equal(10_000, ss.stratum_size(:girls))
|
109
|
+
assert_equal(36, ss.sample_size)
|
110
|
+
assert_equal(75, ss.mean(:test))
|
111
|
+
assert_in_delta(1.45, ss.standard_error_wor(:test), 0.01)
|
112
|
+
assert_in_delta(ss.standard_error_wor(:test), ss.standard_error_wor_2(:test), 0.00001)
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_each
|
116
|
+
xpe = {
|
117
|
+
'a' => Daru::Vector.new(%w(a a a a)),
|
118
|
+
'b' => Daru::Vector.new(%w(b b b b))
|
119
|
+
}
|
120
|
+
ype = {
|
121
|
+
'a' => Daru::Vector.new([1, 2, 3, 4]),
|
122
|
+
'b' => Daru::Vector.new([5, 6, 7, 8])
|
123
|
+
}
|
124
|
+
zpe = {
|
125
|
+
'a' => Daru::Vector.new([10, 11, 12, 13]),
|
126
|
+
'b' => Daru::Vector.new([14, 15, 16, 17])
|
127
|
+
}
|
128
|
+
xp, yp, zp = {}, {}, {}
|
129
|
+
@ms.each {|k, ds|
|
130
|
+
xp[k] = ds[:x]
|
131
|
+
yp[k] = ds[:y]
|
132
|
+
zp[k] = ds[:z]
|
133
|
+
}
|
134
|
+
assert_equal(xpe, xp)
|
135
|
+
assert_equal(ype, yp)
|
136
|
+
assert_equal(zpe, zp)
|
137
|
+
end
|
138
|
+
|
139
|
+
def test_multiset_union_with_block
|
140
|
+
r1 = rand
|
141
|
+
r2 = rand
|
142
|
+
ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
|
143
|
+
|
144
|
+
ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
|
145
|
+
|
146
|
+
ds2 = @ms.union {|k, ds|
|
147
|
+
ds[:y].recode!{|v|
|
148
|
+
k == 'a' ? v * r1 : v * r2
|
149
|
+
}
|
150
|
+
ds[:z].recode!{|v|
|
151
|
+
k == 'a' ? v * r1 : v * r2
|
152
|
+
}
|
153
|
+
}
|
154
|
+
assert_equal(ye, ds2[:y])
|
155
|
+
assert_equal(ze, ds2[:z])
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_multiset_union
|
159
|
+
r1 = rand
|
160
|
+
r2 = rand
|
161
|
+
ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
|
162
|
+
ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
|
163
|
+
|
164
|
+
@ms.each do |k, ds|
|
165
|
+
ds[:y].recode! { |v|
|
166
|
+
k == 'a' ? v * r1 : v * r2
|
167
|
+
}
|
168
|
+
ds[:z].recode! {|v|
|
169
|
+
k == 'a' ? v * r1 : v * r2
|
170
|
+
}
|
171
|
+
end
|
172
|
+
ds2 = @ms.union
|
173
|
+
assert_equal(ye, ds2[:y])
|
174
|
+
assert_equal(ze, ds2[:z])
|
175
|
+
end
|
176
|
+
end
|