RubyGems - statsample - Versions diffs - 1.5.0 → 2.0.0 - Mend

statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

checksums.yaml +4 -4
data/.build.sh +15 -0
data/.gitignore +1 -0
data/.travis.yml +19 -7
data/CONTRIBUTING.md +33 -0
data/History.txt +5 -0
data/README.md +41 -53
data/benchmarks/correlation_matrix_15_variables.rb +6 -5
data/benchmarks/correlation_matrix_5_variables.rb +6 -5
data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
data/examples/boxplot.rb +17 -5
data/examples/correlation_matrix.rb +36 -7
data/examples/dataset.rb +25 -5
data/examples/dominance_analysis.rb +8 -7
data/examples/dominance_analysis_bootstrap.rb +16 -11
data/examples/histogram.rb +16 -2
data/examples/icc.rb +5 -6
data/examples/levene.rb +17 -3
data/examples/multiple_regression.rb +6 -3
data/examples/parallel_analysis.rb +11 -6
data/examples/polychoric.rb +26 -13
data/examples/principal_axis.rb +8 -4
data/examples/reliability.rb +10 -10
data/examples/scatterplot.rb +8 -0
data/examples/t_test.rb +7 -0
data/examples/u_test.rb +10 -2
data/examples/vector.rb +9 -6
data/examples/velicer_map_test.rb +12 -8
data/lib/statsample.rb +13 -47
data/lib/statsample/analysis/suite.rb +1 -1
data/lib/statsample/anova/oneway.rb +6 -6
data/lib/statsample/anova/twoway.rb +26 -24
data/lib/statsample/bivariate.rb +78 -61
data/lib/statsample/bivariate/pearson.rb +2 -2
data/lib/statsample/codification.rb +45 -32
data/lib/statsample/converter/csv.rb +15 -53
data/lib/statsample/converter/spss.rb +6 -5
data/lib/statsample/converters.rb +50 -211
data/lib/statsample/crosstab.rb +26 -25
data/lib/statsample/daru.rb +117 -0
data/lib/statsample/dataset.rb +70 -942
data/lib/statsample/dominanceanalysis.rb +16 -17
data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
data/lib/statsample/factor/parallelanalysis.rb +17 -19
data/lib/statsample/factor/pca.rb +21 -20
data/lib/statsample/factor/principalaxis.rb +3 -3
data/lib/statsample/graph/boxplot.rb +8 -16
data/lib/statsample/graph/histogram.rb +4 -4
data/lib/statsample/graph/scatterplot.rb +8 -7
data/lib/statsample/histogram.rb +128 -119
data/lib/statsample/matrix.rb +20 -16
data/lib/statsample/multiset.rb +39 -38
data/lib/statsample/regression.rb +3 -3
data/lib/statsample/regression/multiple.rb +8 -10
data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
data/lib/statsample/regression/multiple/baseengine.rb +32 -32
data/lib/statsample/regression/multiple/gslengine.rb +33 -36
data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
data/lib/statsample/reliability.rb +23 -25
data/lib/statsample/reliability/icc.rb +8 -7
data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
data/lib/statsample/reliability/scaleanalysis.rb +58 -60
data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
data/lib/statsample/resample.rb +1 -1
data/lib/statsample/shorthand.rb +29 -25
data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
data/lib/statsample/test/levene.rb +28 -27
data/lib/statsample/test/t.rb +7 -9
data/lib/statsample/test/umannwhitney.rb +28 -28
data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
data/lib/statsample/vector.rb +70 -1013
data/lib/statsample/version.rb +1 -1
data/statsample.gemspec +12 -16
data/test/helpers_tests.rb +1 -1
data/test/test_analysis.rb +17 -17
data/test/test_anova_contrast.rb +6 -6
data/test/test_anovatwowaywithdataset.rb +8 -8
data/test/test_anovawithvectors.rb +8 -8
data/test/test_awesome_print_bug.rb +1 -1
data/test/test_bartlettsphericity.rb +4 -4
data/test/test_bivariate.rb +48 -43
data/test/test_codification.rb +33 -33
data/test/test_crosstab.rb +9 -9
data/test/test_dataset.rb +28 -458
data/test/test_factor.rb +46 -38
data/test/test_factor_pa.rb +22 -13
data/test/test_ggobi.rb +4 -4
data/test/test_gsl.rb +4 -4
data/test/test_histogram.rb +3 -3
data/test/test_matrix.rb +13 -13
data/test/test_multiset.rb +103 -91
data/test/test_regression.rb +57 -52
data/test/test_reliability.rb +55 -45
data/test/test_reliability_icc.rb +8 -8
data/test/test_reliability_skillscale.rb +26 -24
data/test/test_resample.rb +1 -1
data/test/test_statistics.rb +3 -13
data/test/test_stest.rb +9 -9
data/test/test_stratified.rb +3 -3
data/test/test_test_t.rb +12 -12
data/test/test_umannwhitney.rb +2 -2
data/test/test_vector.rb +76 -613
data/test/test_wilcoxonsignedrank.rb +4 -4
metadata +57 -28
data/lib/statsample/rserve_extension.rb +0 -20
data/lib/statsample/vector/gsl.rb +0 -106
data/test/fixtures/repeated_fields.csv +0 -7
data/test/fixtures/scientific_notation.csv +0 -4
data/test/fixtures/test_csv.csv +0 -7
data/test/fixtures/test_xls.xls +0 -0
data/test/test_csv.rb +0 -63
data/test/test_rserve_extension.rb +0 -42
data/test/test_xls.rb +0 -52

data/test/test_crosstab.rb CHANGED

@@ -1,8 +1,8 @@
 require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
 class StatsampleCrosstabTestCase < Minitest::Test
   def initialize(*args)
-    @v1 = %w(black blonde black black red black brown black blonde black red black blonde).to_vector
-    @v2 = %w(woman man man woman man man man woman man woman woman man man).to_vector
+    @v1 =Daru::Vector.new( %w(black blonde black black red black brown black blonde black red black blonde))
+    @v2 =Daru::Vector.new( %w(woman man man woman man man man woman man woman woman man man))
     @ct = Statsample::Crosstab.new(@v1, @v2)
     super
   end
@@ -12,7 +12,7 @@ class StatsampleCrosstabTestCase < Minitest::Test
     assert_raise ArgumentError do
       Statsample::Crosstab.new(e1, @v2)
     end
-    e2 = %w(black blonde black black red black brown black blonde black black).to_vector
+    e2 = Daru::Vector.new(%w(black blonde black black red black brown black blonde black black))
     assert_raise ArgumentError do
       Statsample::Crosstab.new(e2, @v2)
@@ -23,8 +23,8 @@ class StatsampleCrosstabTestCase < Minitest::Test
   end
   def test_crosstab_basic
-    assert_equal(%w(black blonde brown red), @ct.rows_names)
-    assert_equal(%w(man woman), @ct.cols_names)
+    assert_equal(Daru::Vector.new(%w(black blonde brown red)), @ct.rows_names)
+    assert_equal(Daru::Vector.new(%w(man woman)), @ct.cols_names)
     assert_equal({ 'black' => 7, 'blonde' => 3, 'red' => 2, 'brown' => 1 }, @ct.rows_total)
     assert_equal({ 'man' => 8, 'woman' => 5 }, @ct.cols_total)
   end
@@ -51,15 +51,15 @@ class StatsampleCrosstabTestCase < Minitest::Test
   end
   def test_expected
-    v1 = %w(1 1 1 1 1 0 0 0 0 0).to_vector
-    v2 = %w(0 0 0 0 0 1 1 1 1 1).to_vector
+    v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
+    v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
     ct = Statsample::Crosstab.new(v1, v2)
     assert_equal(Matrix[[2.5, 2.5], [2.5, 2.5]], ct.matrix_expected)
   end
   def test_crosstab_with_scale
-    v1 = %w(1 1 1 1 1 0 0 0 0 0).to_numeric
-    v2 = %w(0 0 0 0 0 1 1 1 1 1).to_numeric
+    v1 = Daru::Vector.new(%w(1 1 1 1 1 0 0 0 0 0))
+    v2 = Daru::Vector.new(%w(0 0 0 0 0 1 1 1 1 1))
     ct = Statsample::Crosstab.new(v1, v2)
     assert_equal(Matrix[[0, 5], [5, 0]], ct.to_matrix)
     assert_nothing_raised { ct.summary }

data/test/test_dataset.rb CHANGED

@@ -1,479 +1,49 @@
 require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
 class StatsampleDatasetTestCase < Minitest::Test
   def setup
-    @ds = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([1, 2, 3, 4, 5]), 'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)), 'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
-                                    'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
-                                    'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) }, %w(id name age city a1))
-  end
-  def test_nest
-    ds = {
-      'a' => %w(a a a b b b).to_vector,
-      'b' => %w(c c d d e e).to_vector,
-      'c' => %w(f g h i j k).to_vector
-    }.to_dataset
-    nest = ds.nest('a', 'b')
-    assert_equal([{ 'c' => 'f' }, { 'c' => 'g' }], nest['a']['c'])
-    assert_equal([{ 'c' => 'h' }], nest['a']['d'])
-    assert_equal([{ 'c' => 'j' }, { 'c' => 'k' }], nest['b']['e'])
-  end
-  def test_should_have_summary
-    assert(@ds.summary.size > 0)
+    assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
+      @ds = Statsample::Dataset.new({
+        'id' => Statsample::Vector.new([1, 2, 3, 4, 5]),
+        'name' => Statsample::Vector.new(%w(Alex Claude Peter Franz George)),
+        'age' => Statsample::Vector.new([20, 23, 25, 27, 5]),
+        'city' => Statsample::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
+        'a1' => Statsample::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
+        %w(id name age city a1))
+    end
   end
   def test_basic
-    assert_equal(5, @ds.cases)
-    assert_equal(%w(id name age city a1), @ds.fields)
-  end
-  def test_saveload
-    outfile = Tempfile.new('dataset.ds')
-    @ds.save(outfile.path)
-    a = Statsample.load(outfile.path)
-    assert_equal(@ds, a)
-  end
-  def test_gsl
-    if Statsample.has_gsl?
-      matrix = GSL::Matrix[[1, 2], [3, 4], [5, 6]]
-      ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
-      assert_equal(matrix, ds.to_gsl)
-    else
-      skip('Gsl needed')
+    assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#nrows.\n") do
+      assert_equal(5, @ds.cases)
     end
-  end
-  def test_matrix
-    matrix = Matrix[[1, 2], [3, 4], [5, 6]]
-    ds = Statsample::Dataset.new('v1' => [1, 3, 5].to_vector, 'v2' => [2, 4, 6].to_vector)
-    assert_equal(matrix, ds.to_matrix)
+    assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#vectors.\n") do
+      assert_equal([:id, :name, :age, :city, :a1], @ds.fields)
+    end
   end
   def test_fields
-    @ds.fields = %w(name a1 id age city)
-    assert_equal(%w(name a1 id age city), @ds.fields)
-    @ds.fields = %w(id name age)
-    assert_equal(%w(id name age a1 city), @ds.fields)
-  end
-  def test_merge
-    a = [1, 2, 3].to_numeric
-    b = [3, 4, 5].to_vector
-    c = [4, 5, 6].to_numeric
-    d = [7, 8, 9].to_vector
-    e = [10, 20, 30].to_vector
-    ds1 = { 'a' => a, 'b' => b }.to_dataset
-    ds2 = { 'c' => c, 'd' => d }.to_dataset
-    exp = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
-    assert_equal(exp, ds1.merge(ds2))
-    exp.fields = %w(c d a b)
-    assert_equal(exp, ds2.merge(ds1))
-    ds3 = { 'a' => e }.to_dataset
-    exp = { 'a_1' => a, 'b' => b, 'a_2' => e }.to_dataset
-    exp.fields = %w(a_1 b a_2)
-    assert_equal(exp, ds1.merge(ds3))
-  end
-  def test_each_vector
-    a = [1, 2, 3].to_vector
-    b = [3, 4, 5].to_vector
-    fields = %w(a b)
-    ds = Statsample::Dataset.new({ 'a' => a, 'b' => b }, fields)
-    res = []
-    ds.each_vector{|k, v|
-      res.push([k, v])
-    }
-    assert_equal([['a', a], ['b', b]], res)
-    ds.fields = %w(b a)
-    res = []
-    ds.each_vector{|k, v|
-      res.push([k, v])
-    }
-    assert_equal([['b', b], ['a', a]], res)
-  end
-  def test_equality
-    v1 = [1, 2, 3, 4].to_vector
-    v2 = [5, 6, 7, 8].to_vector
-    ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
-    v3 = [1, 2, 3, 4].to_vector
-    v4 = [5, 6, 7, 8].to_vector
-    ds2 = Statsample::Dataset.new({ 'v1' => v3, 'v2' => v4 }, %w(v2 v1))
-    assert_equal(ds1, ds2)
-    ds2.fields = %w(v1 v2)
-    assert_not_equal(ds1, ds2)
-  end
-  def test_add_vector
-    v = Statsample::Vector.new(%w(a b c d e))
-    @ds.add_vector('new', v)
-    assert_equal(%w(id name age city a1 new), @ds.fields)
-    x = Statsample::Vector.new(%w(a b c d e f g))
-    assert_raise ArgumentError do
-      @ds.add_vector('new2', x)
+    assert_output(nil, "WARNING: Deprecated. Use Daru::DataFrame#reindex_vectors! instead.\nWARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using Daru::DataFrame#vectors.\n") do
+      @ds.fields = %w(name a1 id age city)
+      assert_equal([:name, :a1, :id, :age, :city], @ds.fields)
     end
-  end
-  def test_vector_by_calculation
-    a1 = [1, 2, 3, 4, 5, 6, 7].to_vector(:numeric)
-    a2 = [10, 20, 30, 40, 50, 60, 70].to_vector(:numeric)
-    a3 = [100, 200, 300, 400, 500, 600, 700].to_vector(:numeric)
-    ds = { 'a1' => a1, 'a2' => a2, 'a3' => a3 }.to_dataset
-    total = ds.vector_by_calculation {|row|
-      row['a1'] + row['a2'] + row['a3']
-    }
-    expected = [111, 222, 333, 444, 555, 666, 777].to_vector(:numeric)
-    assert_equal(expected, total)
-  end
-  def test_vector_sum
-    a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
-    a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
-    b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
-    b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
-    ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2 }.to_dataset
-    total = ds.vector_sum
-    a = ds.vector_sum(%w(a1 a2))
-    b = ds.vector_sum(%w(b1 b2))
-    expected_a = [11, 12, 23, 24, 25, nil].to_vector(:numeric)
-    expected_b = [nil, 3, 3, nil, 3, 5].to_vector(:numeric)
-    expected_total = [nil, 15, 26, nil, 28, nil].to_vector(:numeric)
-    assert_equal(expected_a, a)
-    assert_equal(expected_b, b)
-    assert_equal(expected_total, total)
-  end
-  def test_vector_missing_values
-    a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
-    a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
-    b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
-    b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
-    c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
-    ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
-    mva = [2, 3, 0, 1, 0, 1].to_vector(:numeric)
-    assert_equal(mva, ds.vector_missing_values)
-  end
-  def test_has_missing_values
-    a1 = [1, nil, 3, 4, 5, nil].to_vector(:numeric)
-    a2 = [10, nil, 20, 20, 20, 30].to_vector(:numeric)
-    b1 = [nil, nil, 1, 1, 1, 2].to_vector(:numeric)
-    b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
-    c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
-    ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
-    assert(ds.has_missing_data?)
-    clean = ds.dup_only_valid
-    assert(!clean.has_missing_data?)
-  end
-  def test_vector_count_characters
-    a1 = [1, 'abcde', 3, 4, 5, nil].to_vector(:numeric)
-    a2 = [10, 20.3, 20, 20, 20, 30].to_vector(:numeric)
-    b1 = [nil, '343434', 1, 1, 1, 2].to_vector(:numeric)
-    b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
-    c = [nil, 2, 'This is a nice example', 2, 2, 2].to_vector(:numeric)
-    ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
-    exp = [4, 17, 27, 5, 6, 5].to_vector(:numeric)
-    assert_equal(exp, ds.vector_count_characters)
-  end
-  def test_vector_mean
-    a1 = [1, 2, 3, 4, 5, nil].to_vector(:numeric)
-    a2 = [10, 10, 20, 20, 20, 30].to_vector(:numeric)
-    b1 = [nil, 1, 1, 1, 1, 2].to_vector(:numeric)
-    b2 = [2, 2, 2, nil, 2, 3].to_vector(:numeric)
-    c = [nil, 2, 4, 2, 2, 2].to_vector(:numeric)
-    ds = { 'a1' => a1, 'a2' => a2, 'b1' => b1, 'b2' => b2, 'c' => c }.to_dataset
-    total = ds.vector_mean
-    a = ds.vector_mean(%w(a1 a2), 1)
-    b = ds.vector_mean(%w(b1 b2), 1)
-    c = ds.vector_mean(%w(b1 b2 c), 1)
-    expected_a = [5.5, 6, 11.5, 12, 12.5, 30].to_vector(:numeric)
-    expected_b = [2, 1.5, 1.5, 1, 1.5, 2.5].to_vector(:numeric)
-    expected_c = [nil, 5.0 / 3, 7.0 / 3, 1.5, 5.0 / 3, 7.0 / 3].to_vector(:numeric)
-    expected_total = [nil, 3.4, 6, nil, 6.0, nil].to_vector(:numeric)
-    assert_equal(expected_a, a)
-    assert_equal(expected_b, b)
-    assert_equal(expected_c, c)
-    assert_equal(expected_total, total)
-  end
-  def test_each_array
-    expected = [[1, 'Alex', 20, 'New York', 'a,b'], [2, 'Claude', 23, 'London', 'b,c'], [3, 'Peter', 25, 'London', 'a'], [4, 'Franz', 27, 'Paris', nil], [5, 'George', 5, 'Tome', 'a,b,c']]
-    out = []
-    @ds.each_array{ |a|
-      out.push(a)
-    }
-    assert_equal(expected, out)
-  end
-  def test_recode
-    @ds['age'].type = :numeric
-    @ds.recode!('age') { |c| c['id'] * 2 }
-    expected = [2, 4, 6, 8, 10].to_vector(:numeric)
-    assert_equal(expected, @ds['age'])
-  end
-  def test_case_as
-    assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds.case_as_hash(0))
-    assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds.case_as_array(4))
-    # Native methods
-    assert_equal({ 'id' => 1, 'name' => 'Alex', 'city' => 'New York', 'age' => 20, 'a1' => 'a,b' }, @ds._case_as_hash(0))
-    assert_equal([5, 'George', 5, 'Tome', 'a,b,c'], @ds._case_as_array(4))
-  end
-  def test_delete_vector
-    @ds.delete_vector('name')
-    assert_equal(%w(id age city a1), @ds.fields)
-    assert_equal(%w(a1 age city id), @ds.vectors.keys.sort)
-  end
-  def test_change_type
-    @ds.col('age').type = :numeric
-    assert_equal(:numeric, @ds.col('age').type)
-  end
-  def test_split_by_separator_recode
-    @ds.add_vectors_by_split_recode('a1', '_')
-    assert_equal(%w(id name age city a1 a1_1 a1_2 a1_3), @ds.fields)
-    assert_equal([1, 0, 1, nil, 1], @ds.col('a1_1').to_a)
-    assert_equal([1, 1, 0, nil, 1], @ds.col('a1_2').to_a)
-    assert_equal([0, 1, 0, nil, 1], @ds.col('a1_3').to_a)
-    { 'a1_1' => 'a1:a', 'a1_2' => 'a1:b', 'a1_3' => 'a1:c' }.each do |k, v|
-      assert_equal(v, @ds[k].name)
+    assert_raise ArgumentError, "Assigning less fields than vectors is no longer supported" do
+      @ds.fields = %w(id name age)
     end
   end
-  def test_split_by_separator
-    @ds.add_vectors_by_split('a1', '_')
-    assert_equal(%w(id name age city a1 a1_a a1_b a1_c), @ds.fields)
-    assert_equal([1, 0, 1, nil, 1], @ds.col('a1_a').to_a)
-    assert_equal([1, 1, 0, nil, 1], @ds.col('a1_b').to_a)
-    assert_equal([0, 1, 0, nil, 1], @ds.col('a1_c').to_a)
-  end
-  def test_percentiles
-    v1 = (1..100).to_a.to_numeric
-    assert_equal(50.5, v1.median)
-    assert_equal(25.5, v1.percentil(25))
-    v2 = (1..99).to_a.to_numeric
-    assert_equal(50, v2.median)
-    assert_equal(25, v2.percentil(25))
-    v3 = (1..50).to_a.to_numeric
-    assert_equal(25.5, v3.median)
-    assert_equal(13, v3.percentil(25))
-  end
-  def test_add_case
-    ds = Statsample::Dataset.new('a' => [].to_vector, 'b' => [].to_vector, 'c' => [].to_vector)
-    ds.add_case([1, 2, 3])
-    ds.add_case('a' => 4, 'b' => 5, 'c' => 6)
-    ds.add_case([[7, 8, 9], %w(a b c)])
-    assert_equal({ 'a' => 1, 'b' => 2, 'c' => 3 }, ds.case_as_hash(0))
-    assert_equal([4, 5, 6], ds.case_as_array(1))
-    assert_equal([7, 8, 9], ds.case_as_array(2))
-    assert_equal(%w(a b c), ds.case_as_array(3))
-    ds.add_case_array([6, 7, 1])
-    ds.update_valid_data
-    assert_equal([6, 7, 1], ds.case_as_array(4))
-  end
-  def test_marshaling
-    ds_marshal = Marshal.load(Marshal.dump(@ds))
-    assert_equal(ds_marshal, @ds)
-  end
-  def test_range
-    v1 = [1, 2, 3, 4].to_vector
-    v2 = [5, 6, 7, 8].to_vector
-    v3 = [9, 10, 11, 12].to_vector
-    ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2, 'v3' => v3 }, %w(v3 v2 v1))
-    assert_same(v1, ds1['v1'])
-    ds2 = ds1['v2'..'v1']
-    assert_equal(%w(v2 v1), ds2.fields)
-    assert_same(ds1['v1'], ds2['v1'])
-    assert_same(ds1['v2'], ds2['v2'])
-  end
-  def test_clone
-    v1 = [1, 2, 3, 4].to_vector
-    v2 = [5, 6, 7, 8].to_vector
-    ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
-    ds2 = ds1.clone
-    assert_equal(ds1, ds2)
-    assert_not_same(ds1, ds2)
-    assert_equal(ds1['v1'], ds2['v1'])
-    assert_same(ds1['v1'], ds2['v1'])
-    assert_equal(ds1.fields, ds2.fields)
-    assert_not_same(ds1.fields, ds2.fields)
-    assert_equal(ds1.cases, ds2.cases)
-    # partial clone
-    ds3 = ds1.clone('v1')
-    ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
-    assert_equal(ds_exp, ds3)
-    assert_not_same(ds_exp, ds3)
-    assert_equal(ds3['v1'], ds_exp['v1'])
-    assert_same(ds3['v1'], ds_exp['v1'])
-    assert_equal(ds3.fields, ds_exp.fields)
-    assert_equal(ds3.cases, ds_exp.cases)
-    assert_not_same(ds3.fields, ds_exp.fields)
-  end
-  def test_dup
-    v1 = [1, 2, 3, 4].to_vector
-    v2 = [5, 6, 7, 8].to_vector
-    ds1 = Statsample::Dataset.new({ 'v1' => v1, 'v2' => v2 }, %w(v2 v1))
-    ds2 = ds1.dup
-    assert_equal(ds1, ds2)
-    assert_not_same(ds1, ds2)
-    assert_equal(ds1['v1'], ds2['v1'])
-    assert_not_same(ds1['v1'], ds2['v1'])
-    assert_equal(ds1.cases, ds2.cases)
-    assert_equal(ds1.fields, ds2.fields)
-    assert_not_same(ds1.fields, ds2.fields)
-    ds1['v1'].type = :numeric
-    # dup partial
-    ds3 = ds1.dup('v1')
-    ds_exp = Statsample::Dataset.new({ 'v1' => v1 }, %w(v1))
-    assert_equal(ds_exp, ds3)
-    assert_not_same(ds_exp, ds3)
-    assert_equal(ds3['v1'], ds_exp['v1'])
-    assert_not_same(ds3['v1'], ds_exp['v1'])
-    assert_equal(ds3.fields, ds_exp.fields)
-    assert_equal(ds3.cases, ds_exp.cases)
-    assert_not_same(ds3.fields, ds_exp.fields)
-    # empty
-    ds3 = ds1.dup_empty
-    assert_not_equal(ds1, ds3)
-    assert_not_equal(ds1['v1'], ds3['v1'])
-    assert_equal([], ds3['v1'].data)
-    assert_equal([], ds3['v2'].data)
-    assert_equal(:numeric, ds3['v1'].type)
-    assert_equal(ds1.fields, ds2.fields)
-    assert_not_same(ds1.fields, ds2.fields)
-  end
+  def test_crosstab_with_asignation
+    v1 = Daru::Vector.new(%w(a a a b b b c c c))
+    v2 = Daru::Vector.new(%w(a b c a b c a b c))
+    v3 = Daru::Vector.new(%w(0 1 0 0 1 1 0 0 1))
-  def test_from_to
-    assert_equal(%w(name age city), @ds.from_to('name', 'city'))
-    assert_raise ArgumentError do
-      @ds.from_to('name', 'a2')
+    assert_output(nil, "WARNING: Statsample::Dataset and Statsample::Vector have been deprecated in favor of Daru::DataFrame and Daru::Vector. Please switch to using that.\n") do
+      @ds = Statsample::Dataset.crosstab_by_assignation(v1, v2, v3)
     end
-  end
-  def test_each_array_with_nils
-    v1 = [1, -99, 3, 4, 'na'].to_vector(:numeric, missing_values: [-99, 'na'])
-    v2 = [5, 6, -99, 8, 20].to_vector(:numeric, missing_values: [-99])
-    v3 = [9, 10, 11, 12, 20].to_vector(:numeric, missing_values: [-99])
-    ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
-    ds2 = ds1.dup_empty
-    ds1.each_array_with_nils {|row|
-      ds2.add_case_array(row)
-    }
-    ds2.update_valid_data
-    assert_equal([1, nil, 3, 4, nil], ds2['v1'].data)
-    assert_equal([5, 6, nil, 8, 20], ds2['v2'].data)
-  end
-  def test_dup_only_valid
-    v1 = [1, nil, 3, 4].to_vector(:numeric)
-    v2 = [5, 6, nil, 8].to_vector(:numeric)
-    v3 = [9, 10, 11, 12].to_vector(:numeric)
-    ds1 = Statsample::Dataset.new('v1' => v1, 'v2' => v2, 'v3' => v3)
-    ds2 = ds1.dup_only_valid
-    expected = Statsample::Dataset.new('v1' => [1, 4].to_vector(:numeric), 'v2' => [5, 8].to_vector(:numeric), 'v3' => [9, 12].to_vector(:numeric))
-    assert_equal(expected, ds2)
-    assert_equal(expected.vectors.values, Statsample.only_valid(v1, v2, v3))
-    expected_partial = Statsample::Dataset.new('v1' => [1, 3, 4].to_vector(:numeric), 'v3' => [9, 11, 12].to_vector(:numeric))
-    assert_equal(expected_partial, ds1.dup_only_valid(%w(v1 v3)))
-  end
-  def test_filter
-    @ds['age'].type = :numeric
-    filtered = @ds.filter { |c| c['id'] == 2 or c['id'] == 4 }
-    expected = Statsample::Dataset.new({ 'id' => Statsample::Vector.new([2, 4]), 'name' => Statsample::Vector.new(%w(Claude Franz)), 'age' => Statsample::Vector.new([23, 27], :numeric),
-                                         'city' => Statsample::Vector.new(%w(London Paris)),
-                                         'a1' => Statsample::Vector.new(['b,c', nil]) }, %w(id name age city a1))
-    assert_equal(expected, filtered)
-  end
-  def test_filter_field
-    @ds['age'].type = :numeric
-    filtered = @ds.filter_field('id') { |c| c['id'] == 2 or c['id'] == 4 }
-    expected = [2, 4].to_vector
-    assert_equal(expected, filtered)
-  end
-  def test_verify
-    name = %w(r1 r2 r3 r4).to_vector(:object)
-    v1 = [1, 2, 3, 4].to_vector(:numeric)
-    v2 = [4, 3, 2, 1].to_vector(:numeric)
-    v3 = [10, 20, 30, 40].to_vector(:numeric)
-    v4 = %w(a b a b).to_vector(:object)
-    ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'id' => name }.to_dataset
-    ds.fields = %w(v1 v2 v3 v4 id)
-    # Correct
-    t1 = create_test('If v4=a, v1 odd') { |r| r['v4'] == 'b' or (r['v4'] == 'a' and r['v1'].odd?) }
-    t2 = create_test('v3=v1*10')  { |r| r['v3'] == r['v1'] * 10 }
-    # Fail!
-    t3 = create_test("v4='b'") { |r| r['v4'] == 'b' }
-    exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
-    exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
-    res = ds.verify(t3, t1, t2)
-    assert_equal(exp1, res)
-    res = ds.verify('id', t1, t2, t3)
-    assert_equal(exp2, res)
-  end
-  def test_compute_operation
-    v1 = [1, 2, 3, 4].to_vector(:numeric)
-    v2 = [4, 3, 2, 1].to_vector(:numeric)
-    v3 = [10, 20, 30, 40].to_vector(:numeric)
-    vnumeric = [1.quo(2), 1, 3.quo(2), 2].to_vector(:numeric)
-    vsum = [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0].to_vector(:numeric)
-    vmult = [1 * 4, 2 * 3, 3 * 2, 4 * 1].to_vector(:numeric)
-    ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3 }.to_dataset
-    assert_equal(vnumeric, ds.compute('v1/2'))
-    assert_equal(vsum, ds.compute('v1+v2+v3'))
-    assert_equal(vmult, ds.compute('v1*v2'))
-  end
-  def test_crosstab_with_asignation
-    v1 = %w(a a a b b b c c c).to_vector
-    v2 = %w(a b c a b c a b c).to_vector
-    v3 = %w(0 1 0 0 1 1 0 0 1).to_numeric
-    ds = Statsample::Dataset.crosstab_by_asignation(v1, v2, v3)
-    assert_equal(:object, ds['_id'].type)
-    assert_equal(:numeric, ds['a'].type)
-    assert_equal(:numeric, ds['b'].type)
-    ev_id = %w(a b c).to_vector
-    ev_a = %w(0 0 0).to_numeric
-    ev_b = %w(1 1 0).to_numeric
-    ev_c = %w(0 1 1).to_numeric
-    ds2 = { '_id' => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }.to_dataset
-    assert_equal(ds, ds2)
-  end
-  def test_one_to_many
-    cases = [
-      ['1', 'george', 'red', 10, 'blue', 20, nil, nil],
-      ['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
-      ['3', 'alfred', nil, nil, nil, nil, nil, nil]
-    ]
-    ds = Statsample::Dataset.new(%w(id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3))
-    cases.each { |c| ds.add_case_array c }
-    ds.update_valid_data
-    ids = %w(1 1 2 2 2).to_vector
-    colors = %w(red blue green orange white).to_vector
-    values = [10, 20, 15, 30, 20].to_vector
-    col_ids = [1, 2, 1, 2, 3].to_numeric
-    ds_expected = { 'id' => ids, '_col_id' => col_ids, 'color' => colors, 'value' => values }.to_dataset(%w(id _col_id color value))
-    assert_equal(ds_expected, ds.one_to_many(%w(id), 'car_%v%n'))
+    assert_output(nil, "WARNING: Daru uses symbols instead of strings for naming vectors. Please switch to symbols.\n") do
+      assert_equal(:object, @ds['_id'].type)
+    end
   end
 end