statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,38 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ # require 'rserve'
3
+ # require 'statsample/rserve_extension'
4
+
5
+ class StatsampleFactorMpaTestCase < Minitest::Test
6
+ context Statsample::Factor::MAP do
7
+ setup do
8
+ m = Matrix[
9
+ [1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
10
+ [0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
11
+ [0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
12
+ [0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
13
+ [0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
14
+ [0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
15
+ [0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
16
+ [0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
17
+ ]
18
+ @map = Statsample::Factor::MAP.new(m)
19
+ end
20
+ should 'return correct values with pure ruby' do
21
+ @map.use_gsl = false
22
+ map_assertions(@map)
23
+ end
24
+ should_with_gsl 'return correct values with gsl' do
25
+ # require 'ruby-prof'
26
+
27
+ @map.use_gsl = true
28
+ map_assertions(@map)
29
+ end
30
+ end
31
+
32
+ def map_assertions(map)
33
+ assert_in_delta(map.minfm, 0.066445, 0.00001)
34
+ assert_equal(map.number_of_factors, 2)
35
+ assert_in_delta(map.fm[0], 0.312475, 0.00001)
36
+ assert_in_delta(map.fm[1], 0.245121, 0.00001)
37
+ end
38
+ end
@@ -0,0 +1,56 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ # require 'rserve'
3
+ # require 'statsample/rserve_extension'
4
+
5
+ class StatsampleFactorTestCase < Minitest::Test
6
+ include Statsample::Fixtures
7
+ # Based on Hardle and Simar
8
+ def setup
9
+ @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
10
+ end
11
+
12
+ def test_parallelanalysis_with_data
13
+ if Statsample.has_gsl?
14
+ samples = 100
15
+ variables = 10
16
+ iterations = 50
17
+ rng = Distribution::Normal.rng
18
+ f1 = Daru::Vector.new(samples.times.collect { rng.call })
19
+ f2 = Daru::Vector.new(samples.times.collect { rng.call })
20
+ vectors = {}
21
+ variables.times do |i|
22
+ if i < 5
23
+ vectors["v#{i}".to_sym] = Daru::Vector.new(
24
+ samples.times.collect { |nv|
25
+ f1[nv] * 5 + f2[nv] * 2 + rng.call
26
+ }
27
+ )
28
+ else
29
+ vectors["v#{i}".to_sym] = Daru::Vector.new(
30
+ samples.times.collect { |nv|
31
+ f2[nv] * 5 + f1[nv] * 2 + rng.call
32
+ }
33
+ )
34
+ end
35
+ end
36
+ ds = Daru::DataFrame.new(vectors)
37
+
38
+ pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
39
+ pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
40
+ 3.times do |n|
41
+ var = "ev_0000#{n + 1}".to_sym
42
+ assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.07)
43
+ end
44
+ else
45
+ skip('Too slow without GSL')
46
+ end
47
+ end
48
+
49
+ def test_parallelanalysis
50
+ pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
51
+ assert_in_delta(1.2454, pa.ds_eigenvalues[:ev_00001].mean, 0.05)
52
+ assert_in_delta(1.1542, pa.ds_eigenvalues[:ev_00002].mean, 0.01)
53
+ assert_in_delta(1.0836, pa.ds_eigenvalues[:ev_00003].mean, 0.01)
54
+ assert(pa.summary.size > 0)
55
+ end
56
+ end
@@ -0,0 +1,88 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ require 'minitest/autorun'
3
+
4
+ describe Statsample::FitModel do
5
+ before do
6
+ @df = Daru::DataFrame.from_csv 'test/fixtures/df.csv'
7
+ @df.to_category 'c', 'd', 'e'
8
+ end
9
+ context '#df_for_regression' do
10
+ context 'no interaction' do
11
+ it { assert_vectors_from_formula 'y~a+e', %w[a e_B e_C y] }
12
+ end
13
+
14
+ context '2-way interaction' do
15
+ context 'interaction of numerical with numerical' do
16
+ context 'none reoccur' do
17
+ it { assert_vectors_from_formula 'y~a:b', %w[a:b y] }
18
+ end
19
+
20
+ context 'one reoccur' do
21
+ it { assert_vectors_from_formula 'y~a+a:b', %w[a a:b y] }
22
+ end
23
+
24
+ context 'both reoccur' do
25
+ it { assert_vectors_from_formula 'y~a+b+a:b', %w[a a:b b y] }
26
+ end
27
+ end
28
+
29
+ context 'interaction of category with numerical' do
30
+ context 'none reoccur' do
31
+ it { assert_vectors_from_formula 'y~a:e', %w[e_A:a e_B:a e_C:a y] }
32
+ end
33
+
34
+ context 'one reoccur' do
35
+ context 'numeric occur' do
36
+ it { assert_vectors_from_formula 'y~a+a:e', %w[a e_B:a e_C:a y] }
37
+ end
38
+
39
+ context 'category occur' do
40
+ it { assert_vectors_from_formula 'y~e+a:e',
41
+ %w[e_B e_C e_A:a e_B:a e_C:a y] }
42
+ end
43
+ end
44
+
45
+ context 'both reoccur' do
46
+ it { assert_vectors_from_formula 'y~a+e+a:e',
47
+ %w[a e_B e_C e_B:a e_C:a y] }
48
+ end
49
+ end
50
+
51
+ context 'interaction of category with category' do
52
+ context 'none reoccur' do
53
+ it { assert_vectors_from_formula 'y~c:e',
54
+ %w[e_B e_C c_yes:e_A c_yes:e_B c_yes:e_C y] }
55
+ end
56
+
57
+ context 'one reoccur' do
58
+ it { assert_vectors_from_formula 'y~e+c:e',
59
+ %w[e_B e_C c_yes:e_A c_yes:e_B c_yes:e_C y] }
60
+ end
61
+
62
+ context 'both reoccur' do
63
+ it { assert_vectors_from_formula 'y~c+e+c:e',
64
+ %w[c_yes e_B e_C c_yes:e_B c_yes:e_C y] }
65
+ end
66
+ end
67
+ end
68
+
69
+ context 'corner case' do
70
+ context 'example 1' do
71
+ it { assert_vectors_from_formula 'y~d:a+d:e',
72
+ %w[e_B e_C d_male:e_A d_male:e_B d_male:e_C d_female:a d_male:a y] }
73
+ end
74
+ end
75
+
76
+ context 'complex examples' do
77
+ context 'random example 1' do
78
+ it { assert_vectors_from_formula 'y~a+e+c:d+e:d',
79
+ %w[e_B e_C d_male c_yes:d_female c_yes:d_male e_B:d_male e_C:d_male a y] }
80
+ end
81
+
82
+ context 'random example 2' do
83
+ it { assert_vectors_from_formula 'y~e+b+c+d:e+b:e+a:e+0',
84
+ %w[e_A e_B e_C c_yes d_male:e_A d_male:e_B d_male:e_C b e_B:b e_C:b e_A:a e_B:a e_C:a y] }
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,35 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ require 'ostruct'
3
+ class StatsampleGGobiTestCase < Minitest::Test
4
+ def setup
5
+ v1 = Daru::Vector.new([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10)
6
+ @v2 = Daru::Vector.new(%w(a b c a a a b b c d) * 10)
7
+ @v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
8
+ v3 = Daru::Vector.new([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10)
9
+ @ds = Daru::DataFrame.new({ :v1 => v1, :v2 => @v2, :v3 => v3 })
10
+ end
11
+
12
+ def test_values_definition
13
+ a = [1.0, 2, 'a', nil]
14
+ assert_equal('1.0 2 a NA', Statsample::GGobi.values_definition(a, 'NA'))
15
+ end
16
+
17
+ def test_variable_definition
18
+ carrier = OpenStruct.new
19
+ carrier.categorials = []
20
+ carrier.conversions = {}
21
+ real_var_definition = Statsample::GGobi.variable_definition(carrier, @v2, 'variable 2', 'v2')
22
+ expected = <<-EOS
23
+ <categoricalvariable name="variable 2" nickname="v2">
24
+ <levels count="4">
25
+ <level value="1">letter a</level>
26
+ <level value="2">b</level>
27
+ <level value="3">c</level>
28
+ <level value="4">letter d</level></levels>
29
+ </categoricalvariable>
30
+ EOS
31
+ assert_equal(expected.gsub(/\s/, ' '), real_var_definition.gsub(/\s/, ' '))
32
+ assert_equal({ 'variable 2' => { 'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4 } }, carrier.conversions)
33
+ assert_equal(['variable 2'], carrier.categorials)
34
+ end
35
+ end
@@ -0,0 +1,15 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleGSLTestCase < Minitest::Test
3
+ should_with_gsl 'matrix with gsl' do
4
+ a = Daru::Vector.new([1, 2, 3, 4, 20])
5
+ b = Daru::Vector.new([3, 2, 3, 4, 50])
6
+ c = Daru::Vector.new([6, 2, 3, 4, 3])
7
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
8
+ gsl = ds.to_matrix.to_gsl
9
+ assert_equal(5, gsl.size1)
10
+ assert_equal(3, gsl.size2)
11
+ matrix = gsl.to_matrix
12
+ assert_equal(5, matrix.row_size)
13
+ assert_equal(3, matrix.column_size)
14
+ end
15
+ end
@@ -0,0 +1,109 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+
3
+ class StatsampleHistogramTestCase < Minitest::Test
4
+ context Statsample::Histogram do
5
+ should 'alloc correctly with integer' do
6
+ h = Statsample::Histogram.alloc(4)
7
+ assert_equal([0.0] * 4, h.bin)
8
+ assert_equal([0.0] * 5, h.range)
9
+ end
10
+ should 'alloc correctly with array' do
11
+ h = Statsample::Histogram.alloc([1, 3, 7, 9, 20])
12
+ assert_equal([0.0] * 4, h.bin)
13
+ assert_equal([1, 3, 7, 9, 20], h.range)
14
+ end
15
+ should 'alloc correctly with integer and min, max array' do
16
+ h = Statsample::Histogram.alloc(5, [0, 5])
17
+ assert_equal([0.0, 1.0, 2.0, 3.0, 4.0, 5.0], h.range)
18
+ assert_equal([0.0] * 5, h.bin)
19
+ end
20
+ should 'bin() method return correct number of bins' do
21
+ h = Statsample::Histogram.alloc(4)
22
+ assert_equal(4, h.bins)
23
+ end
24
+ should 'increment correctly' do
25
+ h = Statsample::Histogram.alloc(5, [0, 5])
26
+ h.increment 2.5
27
+ assert_equal([0.0, 0.0, 1.0, 0.0, 0.0], h.bin)
28
+ h.increment [0.5, 0.5, 3.5, 3.5]
29
+ assert_equal([2.0, 0.0, 1.0, 2.0, 0.0], h.bin)
30
+ h.increment 0
31
+ assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
32
+ h.increment 5
33
+ assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
34
+ end
35
+
36
+ should 'alloc_uniform correctly with n, min,max' do
37
+ h = Statsample::Histogram.alloc_uniform(5, 0, 10)
38
+ assert_equal(5, h.bins)
39
+ assert_equal([0.0] * 5, h.bin)
40
+ assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
41
+ end
42
+ should 'alloc_uniform correctly with n, [min,max]' do
43
+ h = Statsample::Histogram.alloc_uniform(5, [0, 10])
44
+ assert_equal(5, h.bins)
45
+ assert_equal([0.0] * 5, h.bin)
46
+ assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
47
+ end
48
+ should 'get_range()' do
49
+ h = Statsample::Histogram.alloc_uniform(5, 2, 12)
50
+ 5.times {|i|
51
+ assert_equal([2 + i * 2, 4 + i * 2], h.get_range(i))
52
+ }
53
+ end
54
+ should 'min() and max()' do
55
+ h = Statsample::Histogram.alloc_uniform(5, 2, 12)
56
+ assert_equal(2, h.min)
57
+ assert_equal(12, h.max)
58
+ end
59
+ should 'max_val()' do
60
+ h = Statsample::Histogram.alloc(5, [0, 5])
61
+ 100.times { h.increment(rand * 5) }
62
+ max = h.bin[0]
63
+ (1..4).each {|i|
64
+ max = h.bin[i] if h.bin[i] > max
65
+ }
66
+ assert_equal(max, h.max_val)
67
+ end
68
+ should 'min_val()' do
69
+ h = Statsample::Histogram.alloc(5, [0, 5])
70
+ 100.times { h.increment(rand * 5) }
71
+ min = h.bin[0]
72
+ (1..4).each {|i|
73
+ min = h.bin[i] if h.bin[i] < min
74
+ }
75
+ assert_equal(min, h.min_val)
76
+ end
77
+ should 'return correct estimated mean' do
78
+ a = Daru::Vector.new([1.5, 1.5, 1.5, 3.5, 3.5, 3.5])
79
+ h = Statsample::Histogram.alloc(5, [0, 5])
80
+ h.increment(a)
81
+ assert_equal(2.5, h.estimated_mean)
82
+ end
83
+ should 'return correct estimated standard deviation' do
84
+ a = Daru::Vector.new([0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5])
85
+ h = Statsample::Histogram.alloc(5, [0, 5])
86
+ h.increment(a)
87
+ assert_equal(a.sd, h.estimated_standard_deviation)
88
+ end
89
+ should 'return correct sum for all values' do
90
+ h = Statsample::Histogram.alloc(5, [0, 5])
91
+ n = rand(100)
92
+ n.times { h.increment(1) }
93
+ assert_equal(n, h.sum)
94
+ end
95
+ should 'return correct sum for a subset of values' do
96
+ h = Statsample::Histogram.alloc(5, [0, 5])
97
+ h.increment([0.5, 2.5, 4.5])
98
+ assert_equal(1, h.sum(0, 1))
99
+ assert_equal(2, h.sum(1, 4))
100
+ end
101
+ should 'not raise exception when all values equal' do
102
+ assert_nothing_raised do
103
+ a = Daru::Vector.new([5, 5, 5, 5, 5, 5])
104
+ h = Statsample::Graph::Histogram.new(a)
105
+ h.to_svg
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,48 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+
3
+ class StatsampleMatrixTestCase < Minitest::Test
4
+ def test_to_dataset
5
+ m = Matrix[[1, 4], [2, 5], [3, 6]]
6
+ m.extend Statsample::NamedMatrix
7
+ m.fields_y = [:x1, :x2]
8
+ m.name = 'test'
9
+ samples = 100
10
+ x1 =Daru::Vector.new([1, 2, 3])
11
+ x2 =Daru::Vector.new([4, 5, 6])
12
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
13
+ ds.rename 'test'
14
+ obs = m.to_dataframe
15
+ assert_equal(ds[:x1], obs[:x1])
16
+ assert_equal(ds[:x2], obs[:x2])
17
+ assert_equal(ds[:x1].mean, obs[:x1].mean)
18
+ end
19
+
20
+ def test_covariate
21
+ a = Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
22
+ a.extend Statsample::CovariateMatrix
23
+ a.fields = %w(a b c)
24
+ assert_equal(:correlation, a._type)
25
+
26
+ assert_equal(Matrix[[0.5], [0.3]], a.submatrix(%w(c a), %w(b)))
27
+ assert_equal(Matrix[[1.0, 0.2], [0.2, 1.0]], a.submatrix(%w(c a)))
28
+ assert_equal(:correlation, a.submatrix(%w(c a))._type)
29
+
30
+ a = Matrix[[20, 30, 10], [30, 60, 50], [10, 50, 50]]
31
+
32
+ a.extend Statsample::CovariateMatrix
33
+
34
+ assert_equal(:covariance, a._type)
35
+
36
+ a = Daru::Vector.new(50.times.collect { rand })
37
+ b = Daru::Vector.new(50.times.collect { rand })
38
+ c = Daru::Vector.new(50.times.collect { rand })
39
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
40
+ corr = Statsample::Bivariate.correlation_matrix(ds)
41
+ real = Statsample::Bivariate.covariance_matrix(ds).correlation
42
+ corr.row_size.times do |i|
43
+ corr.column_size.times do |j|
44
+ assert_in_delta(corr[i, j], real[i, j], 1e-15)
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,176 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+
3
+ class StatsampleMultisetTestCase < Minitest::Test
4
+ def setup
5
+ @x = Daru::Vector.new(%w(a a a a b b b b))
6
+ @y = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8])
7
+ @z = Daru::Vector.new([10, 11, 12, 13, 14, 15, 16, 17])
8
+ @ds = Daru::DataFrame.new({ :x => @x, :y => @y, :z => @z })
9
+ @ms = @ds.to_multiset_by_split(:x)
10
+ end
11
+
12
+ def test_creation
13
+ v1a = Daru::Vector.new([1, 2, 3, 4, 5])
14
+ v2b = Daru::Vector.new([11, 21, 31, 41, 51])
15
+ v3c = Daru::Vector.new([21, 23, 34, 45, 56])
16
+ ds1 = Daru::DataFrame.new({ :v1 => v1a, :v2 => v2b, :v3 => v3c })
17
+ v1b = Daru::Vector.new([15, 25, 35, 45, 55])
18
+ v2b = Daru::Vector.new([11, 21, 31, 41, 51])
19
+ v3b = Daru::Vector.new([21, 23, 34, 45, 56])
20
+ ds2 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b, :v3 => v3b })
21
+ ms = Statsample::Multiset.new([:v1, :v2, :v3])
22
+ ms.add_dataset(:ds1, ds1)
23
+ ms.add_dataset(:ds2, ds2)
24
+ assert_equal(ds1, ms[:ds1])
25
+ assert_equal(ds2, ms[:ds2])
26
+ assert_equal(v1a, ms[:ds1][:v1])
27
+ assert_not_equal(v1b, ms[:ds1][:v1])
28
+ ds3 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b })
29
+ assert_raise ArgumentError do
30
+ ms.add_dataset(ds3)
31
+ end
32
+ end
33
+
34
+ def test_creation_empty
35
+ ms = Statsample::Multiset.new_empty_vectors([:id, :age, :name], [:male, :female])
36
+ ds_male = Daru::DataFrame.new({
37
+ :id => Daru::Vector.new([]),
38
+ :age => Daru::Vector.new([]),
39
+ :name => Daru::Vector.new([])
40
+ }, order: [:id, :age, :name])
41
+
42
+ ds_female = Daru::DataFrame.new({
43
+ :id => Daru::Vector.new([]),
44
+ :age => Daru::Vector.new([]),
45
+ :name => Daru::Vector.new([])
46
+ }, order: [:id, :age, :name])
47
+
48
+ ms2 = Statsample::Multiset.new([:id, :age, :name])
49
+ ms2.add_dataset(:male, ds_male)
50
+ ms2.add_dataset(:female, ds_female)
51
+ assert_equal(ms2.fields, ms.fields)
52
+ assert_equal(ms2[:male], ms[:male])
53
+ assert_equal(ms2[:female], ms[:female])
54
+ end
55
+
56
+ def test_to_multiset_by_split_one
57
+ sex = Daru::Vector.new(%w(m m m m m f f f f m))
58
+ city = Daru::Vector.new(%w(London Paris NY London Paris NY London Paris NY Tome))
59
+ age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
60
+ ds = Daru::DataFrame.new({ :sex => sex, :city => city, :age => age })
61
+ ms = ds.to_multiset_by_split(:sex)
62
+ assert_equal(2, ms.n_datasets)
63
+ assert_equal(%w(f m), ms.datasets.keys.sort)
64
+ assert_equal(6, ms['m'].nrows)
65
+ assert_equal(4, ms['f'].nrows)
66
+ assert_equal(%w(London Paris NY London Paris Tome), ms['m'][:city].to_a)
67
+ assert_equal([34, 33, 35, 36], ms['f'][:age].to_a)
68
+ end
69
+
70
+ def test_to_multiset_by_split_multiple
71
+ sex = Daru::Vector.new(%w(m m m m m m m m m m f f f f f f f f f f))
72
+ city = Daru::Vector.new(%w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris))
73
+ hair = Daru::Vector.new(%w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black))
74
+ age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
75
+ ds = Daru::DataFrame.new({
76
+ :sex => sex, :city => city, :hair => hair, :age => age
77
+ }, order: [:sex, :city, :hair, :age])
78
+ ms = ds.to_multiset_by_split(:sex, :city, :hair)
79
+ assert_equal(8, ms.n_datasets)
80
+ assert_equal(3, ms[%w(m London blonde)].nrows)
81
+ assert_equal(3, ms[%w(m London blonde)].nrows)
82
+ assert_equal(1, ms[%w(m Paris black)].nrows)
83
+ end
84
+
85
+ def test_stratum_proportion
86
+ ds1 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) })
87
+ ds2 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 1, 1, 0, 0]) })
88
+ assert_equal(5.0 / 12, ds1[:q1].proportion)
89
+ assert_equal(7.0 / 9, ds2[:q1].proportion)
90
+ ms = Statsample::Multiset.new([:q1])
91
+ ms.add_dataset(:d1, ds1)
92
+ ms.add_dataset(:d2, ds2)
93
+ ss = Statsample::StratifiedSample.new(ms, :d1 => 50, :d2 => 100)
94
+ assert_in_delta(0.655, ss.proportion(:q1), 0.01)
95
+ assert_in_delta(0.345, ss.proportion(:q1, 0), 0.01)
96
+ end
97
+
98
+ def test_stratum_scale
99
+ boys = Daru::DataFrame.new({ :test => Daru::Vector.new([50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90]) })
100
+ girls =Daru::DataFrame.new({ :test => Daru::Vector.new( [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90]) })
101
+ ms = Statsample::Multiset.new([:test])
102
+ ms.add_dataset(:boys, boys)
103
+ ms.add_dataset(:girls, girls)
104
+ ss = Statsample::StratifiedSample.new(ms, :boys => 10_000, :girls => 10_000)
105
+ assert_equal(2, ss.strata_number)
106
+ assert_equal(20_000, ss.population_size)
107
+ assert_equal(10_000, ss.stratum_size(:boys))
108
+ assert_equal(10_000, ss.stratum_size(:girls))
109
+ assert_equal(36, ss.sample_size)
110
+ assert_equal(75, ss.mean(:test))
111
+ assert_in_delta(1.45, ss.standard_error_wor(:test), 0.01)
112
+ assert_in_delta(ss.standard_error_wor(:test), ss.standard_error_wor_2(:test), 0.00001)
113
+ end
114
+
115
+ def test_each
116
+ xpe = {
117
+ 'a' => Daru::Vector.new(%w(a a a a)),
118
+ 'b' => Daru::Vector.new(%w(b b b b))
119
+ }
120
+ ype = {
121
+ 'a' => Daru::Vector.new([1, 2, 3, 4]),
122
+ 'b' => Daru::Vector.new([5, 6, 7, 8])
123
+ }
124
+ zpe = {
125
+ 'a' => Daru::Vector.new([10, 11, 12, 13]),
126
+ 'b' => Daru::Vector.new([14, 15, 16, 17])
127
+ }
128
+ xp, yp, zp = {}, {}, {}
129
+ @ms.each {|k, ds|
130
+ xp[k] = ds[:x]
131
+ yp[k] = ds[:y]
132
+ zp[k] = ds[:z]
133
+ }
134
+ assert_equal(xpe, xp)
135
+ assert_equal(ype, yp)
136
+ assert_equal(zpe, zp)
137
+ end
138
+
139
+ def test_multiset_union_with_block
140
+ r1 = rand
141
+ r2 = rand
142
+ ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
143
+
144
+ ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
145
+
146
+ ds2 = @ms.union {|k, ds|
147
+ ds[:y].recode!{|v|
148
+ k == 'a' ? v * r1 : v * r2
149
+ }
150
+ ds[:z].recode!{|v|
151
+ k == 'a' ? v * r1 : v * r2
152
+ }
153
+ }
154
+ assert_equal(ye, ds2[:y])
155
+ assert_equal(ze, ds2[:z])
156
+ end
157
+
158
+ def test_multiset_union
159
+ r1 = rand
160
+ r2 = rand
161
+ ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
162
+ ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
163
+
164
+ @ms.each do |k, ds|
165
+ ds[:y].recode! { |v|
166
+ k == 'a' ? v * r1 : v * r2
167
+ }
168
+ ds[:z].recode! {|v|
169
+ k == 'a' ? v * r1 : v * r2
170
+ }
171
+ end
172
+ ds2 = @ms.union
173
+ assert_equal(ye, ds2[:y])
174
+ assert_equal(ze, ds2[:z])
175
+ end
176
+ end