RubyGems - statsample-ekatena - Versions diffs - 2.0.2 - Mend

statsample-ekatena 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (156) hide show

checksums.yaml +7 -0
data/.gitignore +15 -0
data/.travis.yml +23 -0
data/CONTRIBUTING.md +17 -0
data/Gemfile +2 -0
data/History.txt +457 -0
data/LICENSE.txt +12 -0
data/README.md +175 -0
data/Rakefile +44 -0
data/benchmarks/correlation_matrix_15_variables.rb +32 -0
data/benchmarks/correlation_matrix_5_variables.rb +33 -0
data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
data/benchmarks/correlation_matrix_methods/results.ds +0 -0
data/benchmarks/factor_map.rb +37 -0
data/benchmarks/helpers_benchmark.rb +5 -0
data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
data/doc_latex/manual/equations.tex +78 -0
data/examples/boxplot.rb +28 -0
data/examples/chisquare_test.rb +23 -0
data/examples/correlation_matrix.rb +32 -0
data/examples/dataset.rb +30 -0
data/examples/dominance_analysis.rb +33 -0
data/examples/dominance_analysis_bootstrap.rb +32 -0
data/examples/histogram.rb +26 -0
data/examples/icc.rb +24 -0
data/examples/levene.rb +29 -0
data/examples/multiple_regression.rb +20 -0
data/examples/multivariate_correlation.rb +33 -0
data/examples/parallel_analysis.rb +40 -0
data/examples/polychoric.rb +40 -0
data/examples/principal_axis.rb +26 -0
data/examples/reliability.rb +31 -0
data/examples/scatterplot.rb +25 -0
data/examples/t_test.rb +27 -0
data/examples/tetrachoric.rb +17 -0
data/examples/u_test.rb +24 -0
data/examples/vector.rb +20 -0
data/examples/velicer_map_test.rb +46 -0
data/grab_references.rb +29 -0
data/lib/spss.rb +134 -0
data/lib/statsample-ekatena/analysis.rb +100 -0
data/lib/statsample-ekatena/analysis/suite.rb +89 -0
data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
data/lib/statsample-ekatena/anova.rb +24 -0
data/lib/statsample-ekatena/anova/contrast.rb +79 -0
data/lib/statsample-ekatena/anova/oneway.rb +187 -0
data/lib/statsample-ekatena/anova/twoway.rb +207 -0
data/lib/statsample-ekatena/bivariate.rb +406 -0
data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
data/lib/statsample-ekatena/codification.rb +182 -0
data/lib/statsample-ekatena/converter/csv.rb +28 -0
data/lib/statsample-ekatena/converter/spss.rb +48 -0
data/lib/statsample-ekatena/converters.rb +211 -0
data/lib/statsample-ekatena/crosstab.rb +188 -0
data/lib/statsample-ekatena/daru.rb +115 -0
data/lib/statsample-ekatena/dataset.rb +10 -0
data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
data/lib/statsample-ekatena/factor.rb +104 -0
data/lib/statsample-ekatena/factor/map.rb +124 -0
data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
data/lib/statsample-ekatena/factor/pca.rb +242 -0
data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
data/lib/statsample-ekatena/factor/rotation.rb +198 -0
data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
data/lib/statsample-ekatena/formula/formula.rb +306 -0
data/lib/statsample-ekatena/graph.rb +11 -0
data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
data/lib/statsample-ekatena/graph/histogram.rb +198 -0
data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
data/lib/statsample-ekatena/histogram.rb +180 -0
data/lib/statsample-ekatena/matrix.rb +329 -0
data/lib/statsample-ekatena/multiset.rb +310 -0
data/lib/statsample-ekatena/regression.rb +65 -0
data/lib/statsample-ekatena/regression/multiple.rb +89 -0
data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
data/lib/statsample-ekatena/regression/simple.rb +121 -0
data/lib/statsample-ekatena/reliability.rb +150 -0
data/lib/statsample-ekatena/reliability/icc.rb +415 -0
data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
data/lib/statsample-ekatena/resample.rb +15 -0
data/lib/statsample-ekatena/shorthand.rb +125 -0
data/lib/statsample-ekatena/srs.rb +169 -0
data/lib/statsample-ekatena/test.rb +82 -0
data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
data/lib/statsample-ekatena/test/chisquare.rb +73 -0
data/lib/statsample-ekatena/test/f.rb +52 -0
data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
data/lib/statsample-ekatena/test/levene.rb +88 -0
data/lib/statsample-ekatena/test/t.rb +309 -0
data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
data/lib/statsample-ekatena/vector.rb +19 -0
data/lib/statsample-ekatena/version.rb +3 -0
data/lib/statsample.rb +282 -0
data/po/es/statsample.mo +0 -0
data/po/es/statsample.po +959 -0
data/po/statsample.pot +947 -0
data/references.txt +24 -0
data/statsample-ekatena.gemspec +49 -0
data/test/fixtures/bank2.dat +200 -0
data/test/fixtures/correlation_matrix.rb +17 -0
data/test/fixtures/df.csv +15 -0
data/test/fixtures/hartman_23.matrix +9 -0
data/test/fixtures/stock_data.csv +500 -0
data/test/fixtures/tetmat_matrix.txt +5 -0
data/test/fixtures/tetmat_test.txt +1001 -0
data/test/helpers_tests.rb +83 -0
data/test/test_analysis.rb +176 -0
data/test/test_anova_contrast.rb +36 -0
data/test/test_anovaoneway.rb +26 -0
data/test/test_anovatwoway.rb +37 -0
data/test/test_anovatwowaywithdataset.rb +47 -0
data/test/test_anovawithvectors.rb +102 -0
data/test/test_awesome_print_bug.rb +16 -0
data/test/test_bartlettsphericity.rb +25 -0
data/test/test_bivariate.rb +164 -0
data/test/test_codification.rb +78 -0
data/test/test_crosstab.rb +67 -0
data/test/test_dominance_analysis.rb +39 -0
data/test/test_factor.rb +228 -0
data/test/test_factor_map.rb +38 -0
data/test/test_factor_pa.rb +56 -0
data/test/test_fit_model.rb +88 -0
data/test/test_ggobi.rb +35 -0
data/test/test_gsl.rb +15 -0
data/test/test_histogram.rb +109 -0
data/test/test_matrix.rb +48 -0
data/test/test_multiset.rb +176 -0
data/test/test_regression.rb +231 -0
data/test/test_reliability.rb +223 -0
data/test/test_reliability_icc.rb +198 -0
data/test/test_reliability_skillscale.rb +57 -0
data/test/test_resample.rb +24 -0
data/test/test_srs.rb +9 -0
data/test/test_statistics.rb +69 -0
data/test/test_stest.rb +69 -0
data/test/test_stratified.rb +17 -0
data/test/test_test_f.rb +33 -0
data/test/test_test_kolmogorovsmirnov.rb +34 -0
data/test/test_test_t.rb +62 -0
data/test/test_umannwhitney.rb +27 -0
data/test/test_vector.rb +12 -0
data/test/test_wilcoxonsignedrank.rb +64 -0
metadata +570 -0

data/test/test_factor_map.rb ADDED

@@ -0,0 +1,38 @@
+require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
+# require 'rserve'
+# require 'statsample/rserve_extension'
+class StatsampleFactorMpaTestCase < Minitest::Test
+  context Statsample::Factor::MAP do
+    setup do
+      m = Matrix[
+            [1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
+            [0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
+            [0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
+            [0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
+            [0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
+            [0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
+            [0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
+            [0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
+      ]
+      @map = Statsample::Factor::MAP.new(m)
+    end
+    should 'return correct values with pure ruby' do
+      @map.use_gsl = false
+      map_assertions(@map)
+    end
+    should_with_gsl 'return correct values with gsl' do
+      # require 'ruby-prof'
+      @map.use_gsl = true
+      map_assertions(@map)
+    end
+  end
+  def map_assertions(map)
+    assert_in_delta(map.minfm, 0.066445, 0.00001)
+    assert_equal(map.number_of_factors, 2)
+    assert_in_delta(map.fm[0], 0.312475, 0.00001)
+    assert_in_delta(map.fm[1], 0.245121, 0.00001)
+    end
+end

data/test/test_factor_pa.rb ADDED

@@ -0,0 +1,56 @@
+require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
+# require 'rserve'
+# require 'statsample/rserve_extension'
+class StatsampleFactorTestCase < Minitest::Test
+  include Statsample::Fixtures
+  # Based on Hardle and Simar
+  def setup
+    @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
+  end
+  def test_parallelanalysis_with_data
+    if Statsample.has_gsl?
+      samples = 100
+      variables = 10
+      iterations = 50
+      rng = Distribution::Normal.rng
+      f1 = Daru::Vector.new(samples.times.collect { rng.call })
+      f2 = Daru::Vector.new(samples.times.collect { rng.call })
+      vectors = {}
+      variables.times do |i|
+        if i < 5
+          vectors["v#{i}".to_sym] = Daru::Vector.new(
+            samples.times.collect { |nv|
+              f1[nv] * 5 + f2[nv] * 2 + rng.call
+            }
+          )
+        else
+          vectors["v#{i}".to_sym] =  Daru::Vector.new(
+            samples.times.collect { |nv|
+              f2[nv] * 5 + f1[nv] * 2 + rng.call
+            }
+          )
+        end
+      end
+      ds = Daru::DataFrame.new(vectors)
+      pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
+      pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
+      3.times do |n|
+        var = "ev_0000#{n + 1}".to_sym
+        assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.07)
+      end
+    else
+      skip('Too slow without GSL')
+    end
+  end
+  def test_parallelanalysis
+    pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
+    assert_in_delta(1.2454, pa.ds_eigenvalues[:ev_00001].mean, 0.05)
+    assert_in_delta(1.1542, pa.ds_eigenvalues[:ev_00002].mean, 0.01)
+    assert_in_delta(1.0836, pa.ds_eigenvalues[:ev_00003].mean, 0.01)
+    assert(pa.summary.size > 0)
+  end
+end

data/test/test_fit_model.rb ADDED

@@ -0,0 +1,88 @@
+require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
+require 'minitest/autorun'
+describe Statsample::FitModel do
+  before do
+    @df = Daru::DataFrame.from_csv 'test/fixtures/df.csv'
+    @df.to_category 'c', 'd', 'e'
+  end
+  context '#df_for_regression' do
+    context 'no interaction' do
+      it { assert_vectors_from_formula 'y~a+e', %w[a e_B e_C y] }
+    end
+    context '2-way interaction' do
+      context 'interaction of numerical with numerical' do
+        context 'none reoccur' do
+          it { assert_vectors_from_formula 'y~a:b', %w[a:b y] }
+        end
+        context 'one reoccur' do
+          it { assert_vectors_from_formula 'y~a+a:b', %w[a a:b y] }
+        end
+        context 'both reoccur' do
+          it { assert_vectors_from_formula 'y~a+b+a:b', %w[a a:b b y] }
+        end
+      end
+      context 'interaction of category with numerical' do
+        context 'none reoccur' do
+          it { assert_vectors_from_formula 'y~a:e', %w[e_A:a e_B:a e_C:a y] }
+        end
+        context 'one reoccur' do
+          context 'numeric occur' do
+            it { assert_vectors_from_formula 'y~a+a:e', %w[a e_B:a e_C:a y] }
+          end
+          context 'category occur' do
+            it { assert_vectors_from_formula 'y~e+a:e',
+              %w[e_B e_C e_A:a e_B:a e_C:a y] }
+          end
+        end
+        context 'both reoccur' do
+          it { assert_vectors_from_formula 'y~a+e+a:e',
+            %w[a e_B e_C e_B:a e_C:a y] }
+        end
+      end
+      context 'interaction of category with category' do
+        context 'none reoccur' do
+          it { assert_vectors_from_formula 'y~c:e',
+            %w[e_B e_C c_yes:e_A c_yes:e_B c_yes:e_C y] }
+        end
+        context 'one reoccur' do
+          it { assert_vectors_from_formula 'y~e+c:e',
+            %w[e_B e_C c_yes:e_A c_yes:e_B c_yes:e_C y] }
+        end
+        context 'both reoccur' do
+          it { assert_vectors_from_formula 'y~c+e+c:e',
+            %w[c_yes e_B e_C c_yes:e_B c_yes:e_C y] }
+        end
+      end
+    end
+    context 'corner case' do
+      context 'example 1' do
+        it { assert_vectors_from_formula 'y~d:a+d:e',
+          %w[e_B e_C d_male:e_A d_male:e_B d_male:e_C d_female:a d_male:a y] }
+      end
+    end
+    context 'complex examples' do
+      context 'random example 1' do
+        it { assert_vectors_from_formula 'y~a+e+c:d+e:d',
+          %w[e_B e_C d_male c_yes:d_female c_yes:d_male e_B:d_male e_C:d_male a y] }
+      end
+      context 'random example 2' do
+        it { assert_vectors_from_formula 'y~e+b+c+d:e+b:e+a:e+0',
+          %w[e_A e_B e_C c_yes d_male:e_A d_male:e_B d_male:e_C b e_B:b e_C:b e_A:a e_B:a e_C:a y] }
+      end
+    end
+  end
+end

data/test/test_ggobi.rb ADDED

@@ -0,0 +1,35 @@
+require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
+require 'ostruct'
+class StatsampleGGobiTestCase < Minitest::Test
+  def setup
+    v1  = Daru::Vector.new([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10)
+    @v2 = Daru::Vector.new(%w(a b c a a a b b c d) * 10)
+    @v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
+    v3  = Daru::Vector.new([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10)
+    @ds = Daru::DataFrame.new({ :v1 => v1, :v2 => @v2, :v3 => v3 })
+  end
+  def test_values_definition
+    a = [1.0, 2, 'a', nil]
+    assert_equal('1.0 2 a NA', Statsample::GGobi.values_definition(a, 'NA'))
+  end
+  def test_variable_definition
+    carrier = OpenStruct.new
+    carrier.categorials = []
+    carrier.conversions = {}
+    real_var_definition = Statsample::GGobi.variable_definition(carrier, @v2, 'variable 2', 'v2')
+    expected = <<-EOS
+<categoricalvariable name="variable 2" nickname="v2">
+<levels count="4">
+<level value="1">letter a</level>
+<level value="2">b</level>
+<level value="3">c</level>
+<level value="4">letter d</level></levels>
+</categoricalvariable>
+    EOS
+    assert_equal(expected.gsub(/\s/, ' '), real_var_definition.gsub(/\s/, ' '))
+    assert_equal({ 'variable 2' => { 'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4 } }, carrier.conversions)
+    assert_equal(['variable 2'], carrier.categorials)
+  end
+end

data/test/test_gsl.rb ADDED

@@ -0,0 +1,15 @@
+require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
+class StatsampleGSLTestCase < Minitest::Test
+  should_with_gsl 'matrix with gsl' do
+    a = Daru::Vector.new([1, 2, 3, 4, 20])
+    b = Daru::Vector.new([3, 2, 3, 4, 50])
+    c = Daru::Vector.new([6, 2, 3, 4, 3])
+    ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
+    gsl = ds.to_matrix.to_gsl
+    assert_equal(5, gsl.size1)
+    assert_equal(3, gsl.size2)
+    matrix = gsl.to_matrix
+    assert_equal(5, matrix.row_size)
+    assert_equal(3, matrix.column_size)
+  end
+end

data/test/test_histogram.rb ADDED

@@ -0,0 +1,109 @@
+require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
+class StatsampleHistogramTestCase < Minitest::Test
+  context Statsample::Histogram do
+    should 'alloc correctly with integer' do
+      h = Statsample::Histogram.alloc(4)
+      assert_equal([0.0] * 4, h.bin)
+      assert_equal([0.0] * 5, h.range)
+    end
+    should 'alloc correctly with array' do
+      h = Statsample::Histogram.alloc([1, 3, 7, 9, 20])
+      assert_equal([0.0] * 4, h.bin)
+      assert_equal([1, 3, 7, 9, 20], h.range)
+    end
+    should 'alloc correctly with integer and min, max array' do
+      h = Statsample::Histogram.alloc(5, [0, 5])
+      assert_equal([0.0, 1.0, 2.0, 3.0, 4.0, 5.0], h.range)
+      assert_equal([0.0] * 5, h.bin)
+    end
+    should 'bin() method return correct number of bins' do
+      h = Statsample::Histogram.alloc(4)
+      assert_equal(4, h.bins)
+    end
+    should 'increment correctly' do
+      h = Statsample::Histogram.alloc(5, [0, 5])
+      h.increment 2.5
+      assert_equal([0.0, 0.0, 1.0, 0.0, 0.0], h.bin)
+      h.increment [0.5, 0.5, 3.5, 3.5]
+      assert_equal([2.0, 0.0, 1.0, 2.0, 0.0], h.bin)
+      h.increment 0
+      assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
+      h.increment 5
+      assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
+    end
+    should 'alloc_uniform correctly with n, min,max' do
+      h = Statsample::Histogram.alloc_uniform(5, 0, 10)
+      assert_equal(5, h.bins)
+      assert_equal([0.0] * 5, h.bin)
+      assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
+    end
+    should 'alloc_uniform correctly with n, [min,max]' do
+      h = Statsample::Histogram.alloc_uniform(5, [0, 10])
+      assert_equal(5, h.bins)
+      assert_equal([0.0] * 5, h.bin)
+      assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
+    end
+    should 'get_range()' do
+      h = Statsample::Histogram.alloc_uniform(5, 2, 12)
+      5.times {|i|
+        assert_equal([2 + i * 2, 4 + i * 2], h.get_range(i))
+      }
+    end
+    should 'min() and max()' do
+      h = Statsample::Histogram.alloc_uniform(5, 2, 12)
+      assert_equal(2, h.min)
+      assert_equal(12, h.max)
+    end
+    should 'max_val()' do
+      h = Statsample::Histogram.alloc(5, [0, 5])
+      100.times { h.increment(rand * 5) }
+      max = h.bin[0]
+      (1..4).each {|i|
+        max = h.bin[i] if h.bin[i] > max
+      }
+      assert_equal(max, h.max_val)
+    end
+    should 'min_val()' do
+      h = Statsample::Histogram.alloc(5, [0, 5])
+      100.times { h.increment(rand * 5) }
+      min = h.bin[0]
+      (1..4).each {|i|
+        min = h.bin[i] if h.bin[i] < min
+      }
+      assert_equal(min, h.min_val)
+    end
+    should 'return correct estimated mean' do
+      a = Daru::Vector.new([1.5, 1.5, 1.5, 3.5, 3.5, 3.5])
+      h = Statsample::Histogram.alloc(5, [0, 5])
+      h.increment(a)
+      assert_equal(2.5, h.estimated_mean)
+    end
+    should 'return correct estimated standard deviation' do
+      a = Daru::Vector.new([0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5])
+      h = Statsample::Histogram.alloc(5, [0, 5])
+      h.increment(a)
+      assert_equal(a.sd, h.estimated_standard_deviation)
+    end
+    should 'return correct sum for all values' do
+      h = Statsample::Histogram.alloc(5, [0, 5])
+      n = rand(100)
+      n.times { h.increment(1) }
+      assert_equal(n, h.sum)
+    end
+    should 'return correct sum for a subset of values' do
+      h = Statsample::Histogram.alloc(5, [0, 5])
+      h.increment([0.5, 2.5, 4.5])
+      assert_equal(1, h.sum(0, 1))
+      assert_equal(2, h.sum(1, 4))
+    end
+    should 'not raise exception when all values equal' do
+      assert_nothing_raised do
+        a = Daru::Vector.new([5, 5, 5, 5, 5, 5])
+        h = Statsample::Graph::Histogram.new(a)
+        h.to_svg
+      end
+    end
+  end
+end

data/test/test_matrix.rb ADDED

@@ -0,0 +1,48 @@
+require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
+class StatsampleMatrixTestCase < Minitest::Test
+  def test_to_dataset
+    m = Matrix[[1, 4], [2, 5], [3, 6]]
+    m.extend Statsample::NamedMatrix
+    m.fields_y = [:x1, :x2]
+    m.name = 'test'
+    samples = 100
+    x1 =Daru::Vector.new([1, 2, 3])
+    x2 =Daru::Vector.new([4, 5, 6])
+    ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2 })
+    ds.rename 'test'
+    obs = m.to_dataframe
+    assert_equal(ds[:x1], obs[:x1])
+    assert_equal(ds[:x2], obs[:x2])
+    assert_equal(ds[:x1].mean, obs[:x1].mean)
+  end
+  def test_covariate
+    a = Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
+    a.extend Statsample::CovariateMatrix
+    a.fields = %w(a b c)
+    assert_equal(:correlation, a._type)
+    assert_equal(Matrix[[0.5], [0.3]], a.submatrix(%w(c a), %w(b)))
+    assert_equal(Matrix[[1.0, 0.2], [0.2, 1.0]], a.submatrix(%w(c a)))
+    assert_equal(:correlation, a.submatrix(%w(c a))._type)
+    a = Matrix[[20, 30, 10], [30, 60, 50], [10, 50, 50]]
+    a.extend Statsample::CovariateMatrix
+    assert_equal(:covariance, a._type)
+    a = Daru::Vector.new(50.times.collect { rand })
+    b = Daru::Vector.new(50.times.collect { rand })
+    c = Daru::Vector.new(50.times.collect { rand })
+    ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c })
+    corr = Statsample::Bivariate.correlation_matrix(ds)
+    real = Statsample::Bivariate.covariance_matrix(ds).correlation
+    corr.row_size.times do |i|
+      corr.column_size.times do |j|
+        assert_in_delta(corr[i, j], real[i, j], 1e-15)
+      end
+    end
+  end
+end

data/test/test_multiset.rb ADDED

@@ -0,0 +1,176 @@
+require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
+class StatsampleMultisetTestCase < Minitest::Test
+  def setup
+    @x = Daru::Vector.new(%w(a a a a b b b b))
+    @y = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8])
+    @z = Daru::Vector.new([10, 11, 12, 13, 14, 15, 16, 17])
+    @ds = Daru::DataFrame.new({ :x => @x, :y => @y, :z => @z })
+    @ms = @ds.to_multiset_by_split(:x)
+  end
+  def test_creation
+    v1a = Daru::Vector.new([1, 2, 3, 4, 5])
+    v2b = Daru::Vector.new([11, 21, 31, 41, 51])
+    v3c = Daru::Vector.new([21, 23, 34, 45, 56])
+    ds1 = Daru::DataFrame.new({ :v1 => v1a, :v2 => v2b, :v3 => v3c })
+    v1b = Daru::Vector.new([15, 25, 35, 45, 55])
+    v2b = Daru::Vector.new([11, 21, 31, 41, 51])
+    v3b = Daru::Vector.new([21, 23, 34, 45, 56])
+    ds2 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b, :v3 => v3b })
+    ms = Statsample::Multiset.new([:v1, :v2, :v3])
+    ms.add_dataset(:ds1, ds1)
+    ms.add_dataset(:ds2, ds2)
+    assert_equal(ds1, ms[:ds1])
+    assert_equal(ds2, ms[:ds2])
+    assert_equal(v1a, ms[:ds1][:v1])
+    assert_not_equal(v1b, ms[:ds1][:v1])
+    ds3 = Daru::DataFrame.new({ :v1 => v1b, :v2 => v2b })
+    assert_raise ArgumentError do
+      ms.add_dataset(ds3)
+    end
+  end
+  def test_creation_empty
+    ms = Statsample::Multiset.new_empty_vectors([:id, :age, :name], [:male, :female])
+    ds_male   = Daru::DataFrame.new({
+      :id => Daru::Vector.new([]),
+      :age => Daru::Vector.new([]),
+      :name => Daru::Vector.new([])
+      }, order: [:id, :age, :name])
+    ds_female = Daru::DataFrame.new({
+      :id => Daru::Vector.new([]),
+      :age => Daru::Vector.new([]),
+      :name => Daru::Vector.new([])
+      }, order: [:id, :age, :name])
+    ms2 = Statsample::Multiset.new([:id, :age, :name])
+    ms2.add_dataset(:male, ds_male)
+    ms2.add_dataset(:female, ds_female)
+    assert_equal(ms2.fields, ms.fields)
+    assert_equal(ms2[:male], ms[:male])
+    assert_equal(ms2[:female], ms[:female])
+  end
+  def test_to_multiset_by_split_one
+    sex  = Daru::Vector.new(%w(m m m m m f f f f m))
+    city = Daru::Vector.new(%w(London Paris NY London Paris NY London Paris NY Tome))
+    age  = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
+    ds   = Daru::DataFrame.new({ :sex => sex, :city => city, :age => age })
+    ms = ds.to_multiset_by_split(:sex)
+    assert_equal(2, ms.n_datasets)
+    assert_equal(%w(f m), ms.datasets.keys.sort)
+    assert_equal(6, ms['m'].nrows)
+    assert_equal(4, ms['f'].nrows)
+    assert_equal(%w(London Paris NY London Paris Tome), ms['m'][:city].to_a)
+    assert_equal([34, 33, 35, 36], ms['f'][:age].to_a)
+  end
+  def test_to_multiset_by_split_multiple
+    sex = Daru::Vector.new(%w(m m m m m m m m m m f f f f f f f f f f))
+    city = Daru::Vector.new(%w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris))
+    hair = Daru::Vector.new(%w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black))
+    age = Daru::Vector.new([10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40])
+    ds = Daru::DataFrame.new({
+      :sex => sex, :city => city, :hair => hair, :age => age
+      }, order: [:sex, :city, :hair, :age])
+    ms = ds.to_multiset_by_split(:sex, :city, :hair)
+    assert_equal(8, ms.n_datasets)
+    assert_equal(3, ms[%w(m London blonde)].nrows)
+    assert_equal(3, ms[%w(m London blonde)].nrows)
+    assert_equal(1, ms[%w(m Paris black)].nrows)
+  end
+  def test_stratum_proportion
+    ds1 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) })
+    ds2 = Daru::DataFrame.new({ :q1 => Daru::Vector.new([1, 1, 1, 1, 1, 1, 1, 0, 0]) })
+    assert_equal(5.0 / 12, ds1[:q1].proportion)
+    assert_equal(7.0 / 9, ds2[:q1].proportion)
+    ms = Statsample::Multiset.new([:q1])
+    ms.add_dataset(:d1, ds1)
+    ms.add_dataset(:d2, ds2)
+    ss = Statsample::StratifiedSample.new(ms, :d1 => 50, :d2 => 100)
+    assert_in_delta(0.655, ss.proportion(:q1), 0.01)
+    assert_in_delta(0.345, ss.proportion(:q1, 0), 0.01)
+  end
+  def test_stratum_scale
+    boys = Daru::DataFrame.new({ :test => Daru::Vector.new([50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90]) })
+    girls =Daru::DataFrame.new({ :test => Daru::Vector.new( [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90]) })
+    ms = Statsample::Multiset.new([:test])
+    ms.add_dataset(:boys, boys)
+    ms.add_dataset(:girls, girls)
+    ss = Statsample::StratifiedSample.new(ms, :boys => 10_000, :girls => 10_000)
+    assert_equal(2, ss.strata_number)
+    assert_equal(20_000, ss.population_size)
+    assert_equal(10_000, ss.stratum_size(:boys))
+    assert_equal(10_000, ss.stratum_size(:girls))
+    assert_equal(36, ss.sample_size)
+    assert_equal(75, ss.mean(:test))
+    assert_in_delta(1.45, ss.standard_error_wor(:test), 0.01)
+    assert_in_delta(ss.standard_error_wor(:test), ss.standard_error_wor_2(:test), 0.00001)
+  end
+  def test_each
+    xpe = {
+      'a' => Daru::Vector.new(%w(a a a a)),
+      'b' => Daru::Vector.new(%w(b b b b))
+    }
+    ype = {
+      'a' => Daru::Vector.new([1, 2, 3, 4]),
+      'b' => Daru::Vector.new([5, 6, 7, 8])
+    }
+    zpe = {
+      'a' => Daru::Vector.new([10, 11, 12, 13]),
+      'b' => Daru::Vector.new([14, 15, 16, 17])
+    }
+    xp, yp, zp = {}, {}, {}
+    @ms.each {|k, ds|
+      xp[k] = ds[:x]
+      yp[k] = ds[:y]
+      zp[k] = ds[:z]
+    }
+    assert_equal(xpe, xp)
+    assert_equal(ype, yp)
+    assert_equal(zpe, zp)
+  end
+  def test_multiset_union_with_block
+    r1 = rand
+    r2 = rand
+    ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
+    ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
+    ds2 = @ms.union {|k, ds|
+      ds[:y].recode!{|v|
+        k == 'a' ? v * r1 : v * r2
+      }
+      ds[:z].recode!{|v|
+        k == 'a' ? v * r1 : v * r2
+      }
+    }
+    assert_equal(ye, ds2[:y])
+    assert_equal(ze, ds2[:z])
+  end
+  def test_multiset_union
+    r1 = rand
+    r2 = rand
+    ye = Daru::Vector.new([1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2])
+    ze = Daru::Vector.new([10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2])
+    @ms.each do |k, ds|
+      ds[:y].recode! { |v|
+        k == 'a' ? v * r1 : v * r2
+      }
+      ds[:z].recode! {|v|
+        k == 'a' ? v * r1 : v * r2
+      }
+    end
+    ds2 = @ms.union
+    assert_equal(ye, ds2[:y])
+    assert_equal(ze, ds2[:z])
+  end
+end