statsample-ekatena 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,38 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ # require 'rserve'
3
+ # require 'statsample/rserve_extension'
4
+
5
+ class StatsampleFactorMpaTestCase < Minitest::Test
6
+ context Statsample::Factor::MAP do
7
+ setup do
8
+ m = Matrix[
9
+ [1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
10
+ [0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
11
+ [0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
12
+ [0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
13
+ [0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
14
+ [0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
15
+ [0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
16
+ [0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
17
+ ]
18
+ @map = Statsample::Factor::MAP.new(m)
19
+ end
20
+ should 'return correct values with pure ruby' do
21
+ @map.use_gsl = false
22
+ map_assertions(@map)
23
+ end
24
+ should_with_gsl 'return correct values with gsl' do
25
+ # require 'ruby-prof'
26
+
27
+ @map.use_gsl = true
28
+ map_assertions(@map)
29
+ end
30
+ end
31
+
32
+ def map_assertions(map)
33
+ assert_in_delta(map.minfm, 0.066445, 0.00001)
34
+ assert_equal(map.number_of_factors, 2)
35
+ assert_in_delta(map.fm[0], 0.312475, 0.00001)
36
+ assert_in_delta(map.fm[1], 0.245121, 0.00001)
37
+ end
38
+ end
@@ -0,0 +1,56 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ # require 'rserve'
3
+ # require 'statsample/rserve_extension'
4
+
5
+ class StatsampleFactorTestCase < Minitest::Test
6
+ include Statsample::Fixtures
7
+ # Based on Hardle and Simar
8
+ def setup
9
+ @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
10
+ end
11
+
12
+ def test_parallelanalysis_with_data
13
+ if Statsample.has_gsl?
14
+ samples = 100
15
+ variables = 10
16
+ iterations = 50
17
+ rng = Distribution::Normal.rng
18
+ f1 = Daru::Vector.new(samples.times.collect { rng.call })
19
+ f2 = Daru::Vector.new(samples.times.collect { rng.call })
20
+ vectors = {}
21
+ variables.times do |i|
22
+ if i < 5
23
+ vectors["v#{i}".to_sym] = Daru::Vector.new(
24
+ samples.times.collect { |nv|
25
+ f1[nv] * 5 + f2[nv] * 2 + rng.call
26
+ }
27
+ )
28
+ else
29
+ vectors["v#{i}".to_sym] = Daru::Vector.new(
30
+ samples.times.collect { |nv|
31
+ f2[nv] * 5 + f1[nv] * 2 + rng.call
32
+ }
33
+ )
34
+ end
35
+ end
36
+ ds = Daru::DataFrame.new(vectors)
37
+
38
+ pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
39
+ pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
40
+ 3.times do |n|
41
+ var = "ev_0000#{n + 1}".to_sym
42
+ assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.07)
43
+ end
44
+ else
45
+ skip('Too slow without GSL')
46
+ end
47
+ end
48
+
49
+ def test_parallelanalysis
50
+ pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
51
+ assert_in_delta(1.2454, pa.ds_eigenvalues[:ev_00001].mean, 0.05)
52
+ assert_in_delta(1.1542, pa.ds_eigenvalues[:ev_00002].mean, 0.01)
53
+ assert_in_delta(1.0836, pa.ds_eigenvalues[:ev_00003].mean, 0.01)
54
+ assert(pa.summary.size > 0)
55
+ end
56
+ end
@@ -0,0 +1,88 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ require 'minitest/autorun'
3
+
4
+ describe Statsample::FitModel do
5
+ before do
6
+ @df = Daru::DataFrame.from_csv 'test/fixtures/df.csv'
7
+ @df.to_category 'c', 'd', 'e'
8
+ end
9
+ context '#df_for_regression' do
10
+ context 'no interaction' do
11
+ it { assert_vectors_from_formula 'y~a+e', %w[a e_B e_C y] }
12
+ end
13
+
14
+ context '2-way interaction' do
15
+ context 'interaction of numerical with numerical' do
16
+ context 'none reoccur' do
17
+ it { assert_vectors_from_formula 'y~a:b', %w[a:b y] }
18
+ end
19
+
20
+ context 'one reoccur' do
21
+ it { assert_vectors_from_formula 'y~a+a:b', %w[a a:b y] }
22
+ end
23
+
24
+ context 'both reoccur' do
25
+ it { assert_vectors_from_formula 'y~a+b+a:b', %w[a a:b b y] }
26
+ end
27
+ end
28
+
29
+ context 'interaction of category with numerical' do
30
+ context 'none reoccur' do
31
+ it { assert_vectors_from_formula 'y~a:e', %w[e_A:a e_B:a e_C:a y] }
32
+ end
33
+
34
+ context 'one reoccur' do
35
+ context 'numeric occur' do
36
+ it { assert_vectors_from_formula 'y~a+a:e', %w[a e_B:a e_C:a y] }
37
+ end
38
+
39
+ context 'category occur' do
40
+ it { assert_vectors_from_formula 'y~e+a:e',
41
+ %w[e_B e_C e_A:a e_B:a e_C:a y] }
42
+ end
43
+ end
44
+
45
+ context 'both reoccur' do
46
+ it { assert_vectors_from_formula 'y~a+e+a:e',
47
+ %w[a e_B e_C e_B:a e_C:a y] }
48
+ end
49
+ end
50
+
51
+ context 'interaction of category with category' do
52
+ context 'none reoccur' do
53
+ it { assert_vectors_from_formula 'y~c:e',
54
+ %w[e_B e_C c_yes:e_A c_yes:e_B c_yes:e_C y] }
55
+ end
56
+
57
+ context 'one reoccur' do
58
+ it { assert_vectors_from_formula 'y~e+c:e',
59
+ %w[e_B e_C c_yes:e_A c_yes:e_B c_yes:e_C y] }
60
+ end
61
+
62
+ context 'both reoccur' do
63
+ it { assert_vectors_from_formula 'y~c+e+c:e',
64
+ %w[c_yes e_B e_C c_yes:e_B c_yes:e_C y] }
65
+ end
66
+ end
67
+ end
68
+
69
+ context 'corner case' do
70
+ context 'example 1' do
71
+ it { assert_vectors_from_formula 'y~d:a+d:e',
72
+ %w[e_B e_C d_male:e_A d_male:e_B d_male:e_C d_female:a d_male:a y] }
73
+ end
74
+ end
75
+
76
+ context 'complex examples' do
77
+ context 'random example 1' do
78
+ it { assert_vectors_from_formula 'y~a+e+c:d+e:d',
79
+ %w[e_B e_C d_male c_yes:d_female c_yes:d_male e_B:d_male e_C:d_male a y] }
80
+ end
81
+
82
+ context 'random example 2' do
83
+ it { assert_vectors_from_formula 'y~e+b+c+d:e+b:e+a:e+0',
84
+ %w[e_A e_B e_C c_yes d_male:e_A d_male:e_B d_male:e_C b e_B:b e_C:b e_A:a e_B:a e_C:a y] }
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,35 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ require 'ostruct'
3
+ class StatsampleGGobiTestCase < Minitest::Test
4
+ def setup
5
+ v1 = Daru::Vector.new([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10)
6
+ @v2 = Daru::Vector.new(%w(a b c a a a b b c d) * 10)
7
+ @v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
8
+ v3 = Daru::Vector.new([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10)
9
+ @ds = Daru::DataFrame.new({ :v1 => v1, :v2 => @v2, :v3 => v3 })
10
+ end
11
+
12
+ def test_values_definition
13
+ a = [1.0, 2, 'a', nil]
14
+ assert_equal('1.0 2 a NA', Statsample::GGobi.values_definition(a, 'NA'))
15
+ end
16
+
17
+ def test_variable_definition
18
+ carrier = OpenStruct.new
19
+ carrier.categorials = []
20
+ carrier.conversions = {}
21
+ real_var_definition = Statsample::GGobi.variable_definition(carrier, @v2, 'variable 2', 'v2')
22
+ expected = <<-EOS
23
+ <categoricalvariable name="variable 2" nickname="v2">
24
+ <levels count="4">
25
+ <level value="1">letter a</level>
26
+ <level value="2">b</level>
27
+ <level value="3">c</level>
28
+ <level value="4">letter d</level></levels>
29
+ </categoricalvariable>
30
+ EOS
31
+ assert_equal(expected.gsub(/\s/, ' '), real_var_definition.gsub(/\s/, ' '))
32
+ assert_equal({ 'variable 2' => { 'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4 } }, carrier.conversions)
33
+ assert_equal(['variable 2'], carrier.categorials)
34
+ end
35
+ end
@@ -0,0 +1,15 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleGSLTestCase < Minitest::Test
3
+ should_with_gsl 'matrix with gsl' do
4
+ a = Daru::Vector.new([1, 2, 3, 4, 20])
5
+ b = Daru::Vector.new([3, 2, 3, 4, 50])
6
+ c = Daru::Vector.new([6, 2, 3, 4, 3])
7
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
8
+ gsl = ds.to_matrix.to_gsl
9
+ assert_equal(5, gsl.size1)
10
+ assert_equal(3, gsl.size2)
11
+ matrix = gsl.to_matrix
12
+ assert_equal(5, matrix.row_size)
13
+ assert_equal(3, matrix.column_size)
14
+ end
15
+ end
@@ -0,0 +1,109 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+
3
+ class StatsampleHistogramTestCase < Minitest::Test
4
+ context Statsample::Histogram do
5
+ should 'alloc correctly with integer' do
6
+ h = Statsample::Histogram.alloc(4)
7
+ assert_equal([0.0] * 4, h.bin)
8
+ assert_equal([0.0] * 5, h.range)
9
+ end
10
+ should 'alloc correctly with array' do
11
+ h = Statsample::Histogram.alloc([1, 3, 7, 9, 20])
12
+ assert_equal([0.0] * 4, h.bin)
13
+ assert_equal([1, 3, 7, 9, 20], h.range)
14
+ end
15
+ should 'alloc correctly with integer and min, max array' do
16
+ h = Statsample::Histogram.alloc(5, [0, 5])
17
+ assert_equal([0.0, 1.0, 2.0, 3.0, 4.0, 5.0], h.range)
18
+ assert_equal([0.0] * 5, h.bin)
19
+ end
20
+ should 'bin() method return correct number of bins' do
21
+ h = Statsample::Histogram.alloc(4)
22
+ assert_equal(4, h.bins)
23
+ end
24
+ should 'increment correctly' do
25
+ h = Statsample::Histogram.alloc(5, [0, 5])
26
+ h.increment 2.5
27
+ assert_equal([0.0, 0.0, 1.0, 0.0, 0.0], h.bin)
28
+ h.increment [0.5, 0.5, 3.5, 3.5]
29
+ assert_equal([2.0, 0.0, 1.0, 2.0, 0.0], h.bin)
30
+ h.increment 0
31
+ assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
32
+ h.increment 5
33
+ assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
34
+ end
35
+
36
+ should 'alloc_uniform correctly with n, min,max' do
37
+ h = Statsample::Histogram.alloc_uniform(5, 0, 10)
38
+ assert_equal(5, h.bins)
39
+ assert_equal([0.0] * 5, h.bin)
40
+ assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
41
+ end
42
+ should 'alloc_uniform correctly with n, [min,max]' do
43
+ h = Statsample::Histogram.alloc_uniform(5, [0, 10])
44
+ assert_equal(5, h.bins)
45
+ assert_equal([0.0] * 5, h.bin)
46
+ assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
47
+ end
48
+ should 'get_range()' do
49
+ h = Statsample::Histogram.alloc_uniform(5, 2, 12)
50
+ 5.times {|i|
51
+ assert_equal([2 + i * 2, 4 + i * 2], h.get_range(i))
52
+ }
53
+ end
54
+ should 'min() and max()' do
55
+ h = Statsample::Histogram.alloc_uniform(5, 2, 12)
56
+ assert_equal(2, h.min)
57
+ assert_equal(12, h.max)
58
+ end
59
+ should 'max_val()' do
60
+ h = Statsample::Histogram.alloc(5, [0, 5])
61
+ 100.times { h.increment(rand * 5) }
62
+ max = h.bin[0]
63
+ (1..4).each {|i|
64
+ max = h.bin[i] if h.bin[i] > max
65
+ }
66
+ assert_equal(max, h.max_val)
67
+ end
68
+ should 'min_val()' do
69
+ h = Statsample::Histogram.alloc(5, [0, 5])
70
+ 100.times { h.increment(rand * 5) }
71
+ min = h.bin[0]
72
+ (1..4).each {|i|
73
+ min = h.bin[i] if h.bin[i] < min
74
+ }
75
+ assert_equal(min, h.min_val)
76
+ end
77
+ should 'return correct estimated mean' do
78
+ a = Daru::Vector.new([1.5, 1.5, 1.5, 3.5, 3.5, 3.5])
79
+ h = Statsample::Histogram.alloc(5, [0, 5])
80
+ h.increment(a)
81
+ assert_equal(2.5, h.estimated_mean)
82
+ end
83
+ should 'return correct estimated standard deviation' do
84
+ a = Daru::Vector.new([0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5])
85
+ h = Statsample::Histogram.alloc(5, [0, 5])
86
+ h.increment(a)
87
+ assert_equal(a.sd, h.estimated_standard_deviation)
88
+ end
89
+ should 'return correct sum for all values' do
90
+ h = Statsample::Histogram.alloc(5, [0, 5])
91
+ n = rand(100)
92
+ n.times { h.increment(1) }
93
+ assert_equal(n, h.sum)
94
+ end
95
+ should 'return correct sum for a subset of values' do
96
+ h = Statsample::Histogram.alloc(5, [0, 5])
97
+ h.increment([0.5, 2.5, 4.5])
98
+ assert_equal(1, h.sum(0, 1))
99
+ assert_equal(2, h.sum(1, 4))
100
+ end
101
+ should 'not raise exception when all values equal' do
102
+ assert_nothing_raised do
103
+ a = Daru::Vector.new([5, 5, 5, 5, 5, 5])
104
+ h = Statsample::Graph::Histogram.new(a)
105
+ h.to_svg
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,48 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+
3
+ class StatsampleMatrixTestCase < Minitest::Test
4
+ def test_to_dataset
5
+ m = Matrix[[1, 4], [2, 5], [3, 6]]
6
+ m.extend Statsample::NamedMatrix
7
+ m.fields_y = [:x1, :x2]
8
+ m.name = 'test'
9
+ samples = 100
10
+ x1 =Daru::Vector.new([1, 2, 3])
11
+ x2 =Daru::Vector.new([4, 5, 6])
12
+ ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
13
+ ds.rename 'test'
14
+ obs = m.to_dataframe
15
+ assert_equal(ds[:x1], obs[:x1])
16
+ assert_equal(ds[:x2], obs[:x2])
17
+ assert_equal(ds[:x1].mean, obs[:x1].mean)
18
+ end
19
+
20
+ def test_covariate
21
+ a = Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
22
+ a.extend Statsample::CovariateMatrix
23
+ a.fields = %w(a b c)
24
+ assert_equal(:correlation, a._type)
25
+
26
+ assert_equal(Matrix[[0.5], [0.3]], a.submatrix(%w(c a), %w(b)))
27
+ assert_equal(Matrix[[1.0, 0.2], [0.2, 1.0]], a.submatrix(%w(c a)))
28
+ assert_equal(:correlation, a.submatrix(%w(c a))._type)
29
+
30
+ a = Matrix[[20, 30, 10], [30, 60, 50], [10, 50, 50]]
31
+
32
+ a.extend Statsample::CovariateMatrix
33
+
34
+ assert_equal(:covariance, a._type)
35
+
36
+ a = Daru::Vector.new(50.times.collect { rand })
37
+ b = Daru::Vector.new(50.times.collect { rand })
38
+ c = Daru::Vector.new(50.times.collect { rand })
39
+ ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
40
+ corr = Statsample::Bivariate.correlation_matrix(ds)
41
+ real = Statsample::Bivariate.covariance_matrix(ds).correlation
42
+ corr.row_size.times do |i|
43
+ corr.column_size.times do |j|
44
+ assert_in_delta(corr[i, j], real[i, j], 1e-15)
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,176 @@
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+
3
+ class StatsampleMultisetTestCase < Minitest::Test
4
+ def setup
5
+ @x = Daru::Vector.new(%w(a a a a b b b b))
6
+ @y = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8])
7
+ @z = Daru::Vector.new([10, 11, 12, 13, 14, 15, 16, 17])
8
+ @ds = Daru::DataFrame.new({ :x => @x, :y => @y, :z => @z })
9
+ @ms = @ds.to_multiset_by_split(:x)
10
+ end
11
+
12
+ def test_creation
13
+ v1a = Daru::Vector.new([1, 2, 3, 4, 5])
14
+ v2b = Daru::Vector.new([11, 21, 31, 41, 51])
15
+ v3c = Daru::Vector.new([21, 23, 34, 45, 56])
16
+ ds1 = Daru::DataFrame.new({ :v1 => v1a, :v2 => v2b, :v3 => v3c })
17
+ v1b = Daru::Vector.new([15, 25, 35, 45, 55])
18
+ v2b = Daru::Vector.new([11, 21, 31, 41, 51])
19
+ v3b = Daru::Vector.new([21, 23, 34, 45, 56])
20
+ ds2 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b, :v3 => v3b })
21
+ ms = Statsample::Multiset.new([:v1, :v2, :v3])
22
+ ms.add_dataset(:ds1, ds1)
23
+ ms.add_dataset(:ds2, ds2)
24
+ assert_equal(ds1, ms[:ds1])
25
+ assert_equal(ds2, ms[:ds2])
26
+ assert_equal(v1a, ms[:ds1][:v1])
27
+ assert_not_equal(v1b, ms[:ds1][:v1])
28
+ ds3 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b })
29
+ assert_raise ArgumentError do
30
+ ms.add_dataset(ds3)
31
+ end
32
+ end
33
+
34
+ def test_creation_empty
35
+ ms = Statsample::Multiset.new_empty_vectors([:id, :age, :name], [:male, :female])
36
+ ds_male = Daru::DataFrame.new({
37
+ :id => Daru::Vector.new([]),
38
+ :age => Daru::Vector.new([]),
39
+ :name => Daru::Vector.new([])
40
+ }, order: [:id, :age, :name])
41
+
42
+ ds_female = Daru::DataFrame.new({
43
+ :id => Daru::Vector.new([]),
44
+ :age => Daru::Vector.new([]),
45
+ :name => Daru::Vector.new([])
46
+ }, order: [:id, :age, :name])
47
+
48
+ ms2 = Statsample::Multiset.new([:id, :age, :name])
49
+ ms2.add_dataset(:male, ds_male)
50
+ ms2.add_dataset(:female, ds_female)
51
+ assert_equal(ms2.fields, ms.fields)
52
+ assert_equal(ms2[:male], ms[:male])
53
+ assert_equal(ms2[:female], ms[:female])
54
+ end
55
+
56
+ def test_to_multiset_by_split_one
57
+ sex = Daru::Vector.new(%w(m m m m m f f f f m))
58
+ city = Daru::Vector.new(%w(London Paris NY London Paris NY London Paris NY Tome))
59
+ age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
60
+ ds = Daru::DataFrame.new({ :sex => sex, :city => city, :age => age })
61
+ ms = ds.to_multiset_by_split(:sex)
62
+ assert_equal(2, ms.n_datasets)
63
+ assert_equal(%w(f m), ms.datasets.keys.sort)
64
+ assert_equal(6, ms['m'].nrows)
65
+ assert_equal(4, ms['f'].nrows)
66
+ assert_equal(%w(London Paris NY London Paris Tome), ms['m'][:city].to_a)
67
+ assert_equal([34, 33, 35, 36], ms['f'][:age].to_a)
68
+ end
69
+
70
+ def test_to_multiset_by_split_multiple
71
+ sex = Daru::Vector.new(%w(m m m m m m m m m m f f f f f f f f f f))
72
+ city = Daru::Vector.new(%w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris))
73
+ hair = Daru::Vector.new(%w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black))
74
+ age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
75
+ ds = Daru::DataFrame.new({
76
+ :sex => sex, :city => city, :hair => hair, :age => age
77
+ }, order: [:sex, :city, :hair, :age])
78
+ ms = ds.to_multiset_by_split(:sex, :city, :hair)
79
+ assert_equal(8, ms.n_datasets)
80
+ assert_equal(3, ms[%w(m London blonde)].nrows)
81
+ assert_equal(3, ms[%w(m London blonde)].nrows)
82
+ assert_equal(1, ms[%w(m Paris black)].nrows)
83
+ end
84
+
85
+ def test_stratum_proportion
86
+ ds1 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) })
87
+ ds2 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 1, 1, 0, 0]) })
88
+ assert_equal(5.0 / 12, ds1[:q1].proportion)
89
+ assert_equal(7.0 / 9, ds2[:q1].proportion)
90
+ ms = Statsample::Multiset.new([:q1])
91
+ ms.add_dataset(:d1, ds1)
92
+ ms.add_dataset(:d2, ds2)
93
+ ss = Statsample::StratifiedSample.new(ms, :d1 => 50, :d2 => 100)
94
+ assert_in_delta(0.655, ss.proportion(:q1), 0.01)
95
+ assert_in_delta(0.345, ss.proportion(:q1, 0), 0.01)
96
+ end
97
+
98
+ def test_stratum_scale
99
+ boys = Daru::DataFrame.new({ :test => Daru::Vector.new([50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90]) })
100
+ girls =Daru::DataFrame.new({ :test => Daru::Vector.new( [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90]) })
101
+ ms = Statsample::Multiset.new([:test])
102
+ ms.add_dataset(:boys, boys)
103
+ ms.add_dataset(:girls, girls)
104
+ ss = Statsample::StratifiedSample.new(ms, :boys => 10_000, :girls => 10_000)
105
+ assert_equal(2, ss.strata_number)
106
+ assert_equal(20_000, ss.population_size)
107
+ assert_equal(10_000, ss.stratum_size(:boys))
108
+ assert_equal(10_000, ss.stratum_size(:girls))
109
+ assert_equal(36, ss.sample_size)
110
+ assert_equal(75, ss.mean(:test))
111
+ assert_in_delta(1.45, ss.standard_error_wor(:test), 0.01)
112
+ assert_in_delta(ss.standard_error_wor(:test), ss.standard_error_wor_2(:test), 0.00001)
113
+ end
114
+
115
+ def test_each
116
+ xpe = {
117
+ 'a' => Daru::Vector.new(%w(a a a a)),
118
+ 'b' => Daru::Vector.new(%w(b b b b))
119
+ }
120
+ ype = {
121
+ 'a' => Daru::Vector.new([1, 2, 3, 4]),
122
+ 'b' => Daru::Vector.new([5, 6, 7, 8])
123
+ }
124
+ zpe = {
125
+ 'a' => Daru::Vector.new([10, 11, 12, 13]),
126
+ 'b' => Daru::Vector.new([14, 15, 16, 17])
127
+ }
128
+ xp, yp, zp = {}, {}, {}
129
+ @ms.each {|k, ds|
130
+ xp[k] = ds[:x]
131
+ yp[k] = ds[:y]
132
+ zp[k] = ds[:z]
133
+ }
134
+ assert_equal(xpe, xp)
135
+ assert_equal(ype, yp)
136
+ assert_equal(zpe, zp)
137
+ end
138
+
139
+ def test_multiset_union_with_block
140
+ r1 = rand
141
+ r2 = rand
142
+ ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
143
+
144
+ ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
145
+
146
+ ds2 = @ms.union {|k, ds|
147
+ ds[:y].recode!{|v|
148
+ k == 'a' ? v * r1 : v * r2
149
+ }
150
+ ds[:z].recode!{|v|
151
+ k == 'a' ? v * r1 : v * r2
152
+ }
153
+ }
154
+ assert_equal(ye, ds2[:y])
155
+ assert_equal(ze, ds2[:z])
156
+ end
157
+
158
+ def test_multiset_union
159
+ r1 = rand
160
+ r2 = rand
161
+ ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
162
+ ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
163
+
164
+ @ms.each do |k, ds|
165
+ ds[:y].recode! { |v|
166
+ k == 'a' ? v * r1 : v * r2
167
+ }
168
+ ds[:z].recode! {|v|
169
+ k == 'a' ? v * r1 : v * r2
170
+ }
171
+ end
172
+ ds2 = @ms.union
173
+ assert_equal(ye, ds2[:y])
174
+ assert_equal(ze, ds2[:z])
175
+ end
176
+ end