RubyGems - statsample - Versions diffs - 1.5.0 → 2.0.0 - Mend

statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

checksums.yaml +4 -4
data/.build.sh +15 -0
data/.gitignore +1 -0
data/.travis.yml +19 -7
data/CONTRIBUTING.md +33 -0
data/History.txt +5 -0
data/README.md +41 -53
data/benchmarks/correlation_matrix_15_variables.rb +6 -5
data/benchmarks/correlation_matrix_5_variables.rb +6 -5
data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
data/examples/boxplot.rb +17 -5
data/examples/correlation_matrix.rb +36 -7
data/examples/dataset.rb +25 -5
data/examples/dominance_analysis.rb +8 -7
data/examples/dominance_analysis_bootstrap.rb +16 -11
data/examples/histogram.rb +16 -2
data/examples/icc.rb +5 -6
data/examples/levene.rb +17 -3
data/examples/multiple_regression.rb +6 -3
data/examples/parallel_analysis.rb +11 -6
data/examples/polychoric.rb +26 -13
data/examples/principal_axis.rb +8 -4
data/examples/reliability.rb +10 -10
data/examples/scatterplot.rb +8 -0
data/examples/t_test.rb +7 -0
data/examples/u_test.rb +10 -2
data/examples/vector.rb +9 -6
data/examples/velicer_map_test.rb +12 -8
data/lib/statsample.rb +13 -47
data/lib/statsample/analysis/suite.rb +1 -1
data/lib/statsample/anova/oneway.rb +6 -6
data/lib/statsample/anova/twoway.rb +26 -24
data/lib/statsample/bivariate.rb +78 -61
data/lib/statsample/bivariate/pearson.rb +2 -2
data/lib/statsample/codification.rb +45 -32
data/lib/statsample/converter/csv.rb +15 -53
data/lib/statsample/converter/spss.rb +6 -5
data/lib/statsample/converters.rb +50 -211
data/lib/statsample/crosstab.rb +26 -25
data/lib/statsample/daru.rb +117 -0
data/lib/statsample/dataset.rb +70 -942
data/lib/statsample/dominanceanalysis.rb +16 -17
data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
data/lib/statsample/factor/parallelanalysis.rb +17 -19
data/lib/statsample/factor/pca.rb +21 -20
data/lib/statsample/factor/principalaxis.rb +3 -3
data/lib/statsample/graph/boxplot.rb +8 -16
data/lib/statsample/graph/histogram.rb +4 -4
data/lib/statsample/graph/scatterplot.rb +8 -7
data/lib/statsample/histogram.rb +128 -119
data/lib/statsample/matrix.rb +20 -16
data/lib/statsample/multiset.rb +39 -38
data/lib/statsample/regression.rb +3 -3
data/lib/statsample/regression/multiple.rb +8 -10
data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
data/lib/statsample/regression/multiple/baseengine.rb +32 -32
data/lib/statsample/regression/multiple/gslengine.rb +33 -36
data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
data/lib/statsample/reliability.rb +23 -25
data/lib/statsample/reliability/icc.rb +8 -7
data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
data/lib/statsample/reliability/scaleanalysis.rb +58 -60
data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
data/lib/statsample/resample.rb +1 -1
data/lib/statsample/shorthand.rb +29 -25
data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
data/lib/statsample/test/levene.rb +28 -27
data/lib/statsample/test/t.rb +7 -9
data/lib/statsample/test/umannwhitney.rb +28 -28
data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
data/lib/statsample/vector.rb +70 -1013
data/lib/statsample/version.rb +1 -1
data/statsample.gemspec +12 -16
data/test/helpers_tests.rb +1 -1
data/test/test_analysis.rb +17 -17
data/test/test_anova_contrast.rb +6 -6
data/test/test_anovatwowaywithdataset.rb +8 -8
data/test/test_anovawithvectors.rb +8 -8
data/test/test_awesome_print_bug.rb +1 -1
data/test/test_bartlettsphericity.rb +4 -4
data/test/test_bivariate.rb +48 -43
data/test/test_codification.rb +33 -33
data/test/test_crosstab.rb +9 -9
data/test/test_dataset.rb +28 -458
data/test/test_factor.rb +46 -38
data/test/test_factor_pa.rb +22 -13
data/test/test_ggobi.rb +4 -4
data/test/test_gsl.rb +4 -4
data/test/test_histogram.rb +3 -3
data/test/test_matrix.rb +13 -13
data/test/test_multiset.rb +103 -91
data/test/test_regression.rb +57 -52
data/test/test_reliability.rb +55 -45
data/test/test_reliability_icc.rb +8 -8
data/test/test_reliability_skillscale.rb +26 -24
data/test/test_resample.rb +1 -1
data/test/test_statistics.rb +3 -13
data/test/test_stest.rb +9 -9
data/test/test_stratified.rb +3 -3
data/test/test_test_t.rb +12 -12
data/test/test_umannwhitney.rb +2 -2
data/test/test_vector.rb +76 -613
data/test/test_wilcoxonsignedrank.rb +4 -4
metadata +57 -28
data/lib/statsample/rserve_extension.rb +0 -20
data/lib/statsample/vector/gsl.rb +0 -106
data/test/fixtures/repeated_fields.csv +0 -7
data/test/fixtures/scientific_notation.csv +0 -4
data/test/fixtures/test_csv.csv +0 -7
data/test/fixtures/test_xls.xls +0 -0
data/test/test_csv.rb +0 -63
data/test/test_rserve_extension.rb +0 -42
data/test/test_xls.rb +0 -52

data/lib/statsample/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Statsample
-  VERSION = '1.5.0'
+  VERSION = '2.0.0'
 end

data/statsample.gemspec CHANGED

@@ -3,7 +3,7 @@ $:.unshift File.expand_path("../lib/", __FILE__)
 require 'statsample/version'
 require 'date'
-DESCRIPTION = <<MSG
+Statsample::DESCRIPTION = <<MSG
 A suite for basic and advanced statistics on Ruby. Tested on CRuby 1.9.3, 2.0.0
 and 2.1.1. See `.travis.yml` for more information.
@@ -11,7 +11,6 @@ Include:
 - Descriptive statistics: frequencies, median, mean,
 standard error, skew, kurtosis (and many others).
-- Imports and exports datasets from and to Excel, CSV and plain text files.
 - Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial,
 tau a, tau b and  gamma. Tetrachoric and Polychoric correlation provides by
 statsample-bivariate-extension gem.
@@ -32,17 +31,11 @@ scales using factor analysis and correlations, if you want it.
 - Graphics: Histogram, Boxplot and Scatterplot.
 MSG
-POSTINSTALL = <<MSG
+Statsample::POSTINSTALL = <<MSG
 ***************************************************
 Thanks for installing statsample.
-On *nix, you could install statsample-optimization
-to retrieve gems gsl, statistics2 and a C extension
-to speed some methods.
-$ [sudo] gem install statsample-optimization
 *****************************************************
 MSG
@@ -56,8 +49,8 @@ Gem::Specification.new do |s|
   s.email = ["clbustos@gmail.com", "carlos@onox.com.br"]
   s.summary = "A suite for basic and advanced statistics on Ruby"
-  s.description = DESCRIPTION
-  s.post_install_message = POSTINSTALL
+  s.description = Statsample::DESCRIPTION
+  s.post_install_message = Statsample::POSTINSTALL
   s.rdoc_options = ["--main", "README.md"]
   s.extra_rdoc_files = ["History.txt", "LICENSE.txt", "README.md", "references.txt"]
@@ -67,22 +60,25 @@ Gem::Specification.new do |s|
   s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
   s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
-  s.add_runtime_dependency 'spreadsheet', '~> 0.6.5'
+  s.add_runtime_dependency 'daru', '~> 0.1'
+  s.add_runtime_dependency 'spreadsheet', '~> 1.0.3'
   s.add_runtime_dependency 'reportbuilder', '~> 1.4'
   s.add_runtime_dependency 'minimization', '~> 0.2'
   s.add_runtime_dependency 'dirty-memoize', '~> 0.0.4'
-  s.add_runtime_dependency 'extendmatrix', '~> 0.3'
+  s.add_runtime_dependency 'extendmatrix', '~> 0.4'
   s.add_runtime_dependency 'rserve-client', '~> 0.3'
-  s.add_runtime_dependency 'rubyvis', '~> 0.5.0'
+  s.add_runtime_dependency 'rubyvis', '~> 0.6.1'
   s.add_runtime_dependency 'distribution', '~> 0.7'
   s.add_runtime_dependency 'awesome_print', '~> 1.6'
-  s.add_development_dependency 'bundler', '~> 1.7'
+  s.add_development_dependency 'bundler', '~> 1.10'
   s.add_development_dependency 'rake', '~> 10.4'
   s.add_development_dependency 'rdoc', '~> 4.2'
   s.add_development_dependency 'shoulda', '~> 3.5'
   s.add_development_dependency 'shoulda-matchers', '~> 2.2'
-  s.add_development_dependency 'minitest', '~> 5.5'
+  s.add_development_dependency 'minitest', '~> 5.7'
   s.add_development_dependency 'gettext', '~> 3.1'
   s.add_development_dependency 'mocha', '~> 1.1'
+  s.add_development_dependency 'nmatrix', '~> 0.1.0'
+  s.add_development_dependency 'gsl-nmatrix', '~> 1.17.0'
 end

data/test/helpers_tests.rb CHANGED

@@ -33,7 +33,7 @@ module Minitest
     def assert_similar_vector(exp, obs, delta = 1e-10, msg = nil)
       msg ||= "Different vectors #{exp} - #{obs}"
       assert_equal(exp.size, obs.size)
-      exp.data_with_nils.each_with_index {|v, i|
+      exp.to_a.each_with_index {|v, i|
         assert_in_delta(v, obs[i], delta)
       }
     end

data/test/test_analysis.rb CHANGED

@@ -39,7 +39,7 @@ class StatsampleAnalysisTestCase < Minitest::Test
     should 'to_text returns the same as a normal ReportBuilder object' do
       rb = ReportBuilder.new(name: :test)
       section = ReportBuilder::Section.new(name: 'first')
-      a = [1, 2, 3].to_numeric
+      a = Daru::Vector.new([1, 2, 3])
       section.add('first')
       section.add(a)
       rb.add(section)
@@ -98,8 +98,8 @@ class StatsampleAnalysisTestCase < Minitest::Test
       end
       should 'attach() allows to call objects on objects which respond to fields' do
         an = Statsample::Analysis::Suite.new(:summary)
-        ds = { 'x' => stub(mean: 10), 'y' => stub(mean: 12) }
-        ds.expects(:fields).returns(%w(x y)).at_least_once
+        ds = { :x => stub(mean: 10), :y => stub(mean: 12) }
+        ds.expects(:vectors).returns([:x, :y]).at_least_once
         an.attach(ds)
         assert_equal(10, an.x.mean)
         assert_equal(12, an.y.mean)
@@ -109,10 +109,10 @@ class StatsampleAnalysisTestCase < Minitest::Test
       end
       should 'attached objects should be called LIFO' do
         an = Statsample::Analysis::Suite.new(:summary)
-        ds1 = { 'x' => stub(mean: 100), 'y' => stub(mean: 120), 'z' => stub(mean: 13) }
-        ds1.expects(:fields).returns(%w(x y z)).at_least_once
-        ds2 = { 'x' => stub(mean: 10), 'y' => stub(mean: 12) }
-        ds2.expects(:fields).returns(%w(x y)).at_least_once
+        ds1 = { :x => stub(mean: 100), :y => stub(mean: 120), :z => stub(mean: 13) }
+        ds1.expects(:vectors).returns([:x, :y, :z]).at_least_once
+        ds2 = { :x => stub(mean: 10), :y => stub(mean: 12) }
+        ds2.expects(:vectors).returns([:x, :y]).at_least_once
         an.attach(ds1)
         an.attach(ds2)
         assert_equal(10, an.x.mean)
@@ -122,10 +122,10 @@ class StatsampleAnalysisTestCase < Minitest::Test
       should 'detach() without arguments drop latest object' do
         an = Statsample::Analysis::Suite.new(:summary)
-        ds1 = { 'x' => stub(mean: 100), 'y' => stub(mean: 120), 'z' => stub(mean: 13) }
-        ds1.expects(:fields).returns(%w(x y z)).at_least_once
-        ds2 = { 'x' => stub(mean: 10), 'y' => stub(mean: 12) }
-        ds2.expects(:fields).returns(%w(x y)).at_least_once
+        ds1 = { :x => stub(mean: 100), :y => stub(mean: 120), :z => stub(mean: 13) }
+        ds1.expects(:vectors).returns([:x, :y, :z]).at_least_once
+        ds2 = { :x => stub(mean: 10), :y => stub(mean: 12) }
+        ds2.expects(:vectors).returns([:x, :y]).at_least_once
         an.attach(ds1)
         an.attach(ds2)
         assert_equal(10, an.x.mean)
@@ -134,12 +134,12 @@ class StatsampleAnalysisTestCase < Minitest::Test
       end
       should 'detach() with argument drop select object' do
         an = Statsample::Analysis::Suite.new(:summary)
-        ds1 = { 'x' => 1 }
-        ds1.expects(:fields).returns(%w(x)).at_least_once
-        ds2 = { 'x' => 2, 'y' => 3 }
-        ds2.expects(:fields).returns(%w(x y)).at_least_once
-        ds3 = { 'y' => 4 }
-        ds3.expects(:fields).returns(%w(y)).at_least_once
+        ds1 = { :x => 1 }
+        ds1.expects(:vectors).returns([:x]).at_least_once
+        ds2 = { :x => 2, :y => 3 }
+        ds2.expects(:vectors).returns([:x, :y]).at_least_once
+        ds3 = { :y => 4 }
+        ds3.expects(:vectors).returns([:y]).at_least_once
         an.attach(ds3)
         an.attach(ds2)

data/test/test_anova_contrast.rb CHANGED

@@ -2,12 +2,12 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
 class StatsampleAnovaContrastTestCase < Minitest::Test
   context(Statsample::Anova::Contrast) do
     setup do
-      constant = [12, 13, 11, 12, 12].to_numeric
-      frequent = [9, 10, 9, 13, 14].to_numeric
-      infrequent = [15, 16, 17, 16, 16].to_numeric
-      never = [17, 18, 12, 18, 20].to_numeric
-      @vectors = [constant, frequent, infrequent, never]
-      @c = Statsample::Anova::Contrast.new(vectors: @vectors)
+      constant   = Daru::Vector.new([12, 13, 11, 12, 12])
+      frequent   = Daru::Vector.new([9, 10, 9, 13, 14])
+      infrequent = Daru::Vector.new([15, 16, 17, 16, 16])
+      never      = Daru::Vector.new([17, 18, 12, 18, 20])
+      @vectors   = [constant, frequent, infrequent, never]
+      @c         = Statsample::Anova::Contrast.new(vectors: @vectors)
     end
     should 'return correct value using c' do
       @c.c([1, -1.quo(3), -1.quo(3), -1.quo(3)])

data/test/test_anovatwowaywithdataset.rb CHANGED

@@ -4,14 +4,14 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
 class StatsampleAnovaTwoWayWithVectorsTestCase < Minitest::Test
   context(Statsample::Anova::TwoWayWithVectors) do
     setup do
-      @pa = [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3].to_numeric
-      @pa.name = 'Passive Avoidance'
-      @a = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1].to_vector
-      @a.labels = { 0 => '0%', 1 => '35%' }
-      @a.name = 'Diet'
-      @b = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1].to_vector
-      @b.labels = { 0 => 'Young', 1 => 'Older' }
-      @b.name = 'Age'
+      @pa = Daru::Vector.new [5, 4, 3, 4, 2, 18, 19, 14, 12, 15, 6, 7, 5, 8, 4, 6, 9, 5, 9, 3]
+      @pa.rename 'Passive Avoidance'
+      @a = Daru::Vector.new [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
+      # @a.labels = { 0 => '0%', 1 => '35%' }
+      @a.rename 'Diet'
+      @b = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+      # @b.labels = { 0 => 'Young', 1 => 'Older' }
+      @b.rename 'Age'
       @anova = Statsample::Anova::TwoWayWithVectors.new(a: @a, b: @b, dependent: @pa)
     end
     should 'Statsample::Anova respond to #twoway_with_vectors' do

data/test/test_anovawithvectors.rb CHANGED

@@ -3,9 +3,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
   context(Statsample::Anova::OneWayWithVectors) do
     context('when initializing') do
       setup do
-        @v1 = 10.times.map { rand(100) }.to_numeric
-        @v2 = 10.times.map { rand(100) }.to_numeric
-        @v3 = 10.times.map { rand(100) }.to_numeric
+        @v1 = Daru::Vector.new(10.times.map { rand(100) })
+        @v2 = Daru::Vector.new(10.times.map { rand(100) })
+        @v3 = Daru::Vector.new(10.times.map { rand(100) })
       end
       should 'be the same using [] or args*' do
         a1 = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3)
@@ -28,9 +28,9 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
       end
     end
     setup do
-      @v1 = [3, 3, 2, 3, 6].to_vector(:numeric)
-      @v2 = [7, 6, 5, 6, 7].to_vector(:numeric)
-      @v3 = [9, 8, 9, 7, 8].to_vector(:numeric)
+      @v1 = Daru::Vector.new([3, 3, 2, 3, 6])
+      @v2 = Daru::Vector.new([7, 6, 5, 6, 7])
+      @v3 = Daru::Vector.new([9, 8, 9, 7, 8])
       @name = 'Anova testing'
       @anova = Statsample::Anova::OneWayWithVectors.new(@v1, @v2, @v3, name: @name)
     end
@@ -66,10 +66,10 @@ class StatsampleAnovaOneWayWithVectorsTestCase < Minitest::Test
       assert_in_delta(@anova.sst, @anova.sswg + @anova.ssbg, 0.00001)
     end
     should 'df total equal to number of n-1' do
-      assert_equal(@v1.n + @v2.n + @v3.n - 1, @anova.df_total)
+      assert_equal(@v1.size + @v2.size + @v3.size - 1, @anova.df_total)
     end
     should 'df wg equal to number of n-k' do
-      assert_equal(@v1.n + @v2.n + @v3.n - 3, @anova.df_wg)
+      assert_equal(@v1.size + @v2.size + @v3.size - 3, @anova.df_wg)
     end
     should 'df bg equal to number of k-1' do
       assert_equal(2, @anova.df_bg)

data/test/test_awesome_print_bug.rb CHANGED

@@ -5,7 +5,7 @@ class StatsampleAwesomePrintBug < Minitest::Test
       require 'awesome_print'
     end
     should 'should be flawless' do
-      a = [1, 2, 3].to_numeric
+      a = Daru::Vector.new([1, 2, 3])
       assert(a != [1, 2, 3])
       assert_nothing_raised do

data/test/test_bartlettsphericity.rb CHANGED

@@ -4,11 +4,11 @@ class StatsampleBartlettSphericityTestCase < Minitest::Test
   include Statsample::Test
   context Statsample::Test::BartlettSphericity do
     setup do
-      @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric
-      @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric
-      @v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric
+      @v1 = Daru::Vector.new([1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70])
+      @v2 = Daru::Vector.new([5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0])
+      @v3 = Daru::Vector.new([10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4])
       # KMO: 0.490
-      ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
+      ds = Daru::DataFrame.new({ :v1 => @v1, :v2 => @v2, :v3 => @v3 })
       cor = Statsample::Bivariate.correlation_matrix(ds)
       @bs = Statsample::Test::BartlettSphericity.new(cor, 14)
     end

data/test/test_bivariate.rb CHANGED

@@ -1,38 +1,38 @@
 require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
 class StatsampleBivariateTestCase < Minitest::Test
   should 'method sum of squares should be correct' do
-    v1 = [1, 2, 3, 4, 5, 6].to_vector(:numeric)
-    v2 = [6, 2, 4, 10, 12, 8].to_vector(:numeric)
+    v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6])
+    v2 = Daru::Vector.new([6, 2, 4, 10, 12, 8])
     assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1, v2))
   end
   should_with_gsl 'return same covariance with ruby and gls implementation' do
-    v1 = 20.times.collect { |_a| rand }.to_numeric
-    v2 = 20.times.collect { |_a| rand }.to_numeric
+    v1 = Daru::Vector.new(20.times.collect { |_a| rand })
+    v2 = Daru::Vector.new(20.times.collect { |_a| rand })
     assert_in_delta(Statsample::Bivariate.covariance(v1, v2), Statsample::Bivariate.covariance_slow(v1, v2), 0.001)
   end
   should_with_gsl 'return same correlation with ruby and gls implementation' do
-    v1 = 20.times.collect { |_a| rand }.to_numeric
-    v2 = 20.times.collect { |_a| rand }.to_numeric
+    v1 = Daru::Vector.new(20.times.collect { |_a| rand })
+    v2 = Daru::Vector.new(20.times.collect { |_a| rand })
-    assert_in_delta(GSL::Stats.correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
+    assert_in_delta(GSL::Stats.correlation(v1.to_gsl, v2.to_gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
   end
   should 'return correct pearson correlation' do
-    v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
-    v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
+    v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
+    v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
     assert_in_delta(0.525, Statsample::Bivariate.pearson(v1, v2), 0.001)
     assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v1, v2), 0.001)
-    v3 = [6, 2,  1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil].to_vector(:numeric)
-    v4 = [2, nil, nil, nil,  3, 7, 8, 6, 4, 3, 2, 500].to_vector(:numeric)
+    v3 = Daru::Vector.new([6, 2,  1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil])
+    v4 = Daru::Vector.new([2, nil, nil, nil,  3, 7, 8, 6, 4, 3, 2, 500])
     assert_in_delta(0.525, Statsample::Bivariate.pearson(v3, v4), 0.001)
     # Test ruby method
     v3a, v4a = Statsample.only_valid v3, v4
     assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a, v4a), 0.001)
   end
   should 'return correct values for t_pearson and prop_pearson' do
-    v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
-    v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
+    v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
+    v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
     r = Statsample::Bivariate::Pearson.new(v1, v2)
     assert_in_delta(0.525, r.r, 0.001)
     assert_in_delta(Statsample::Bivariate.t_pearson(v1, v2), r.t, 0.001)
@@ -40,11 +40,11 @@ class StatsampleBivariateTestCase < Minitest::Test
     assert(r.summary.size > 0)
   end
   should 'return correct correlation_matrix with nils values' do
-    v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
-    v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
-    v3 = [6, 2,  1000, 1000, 5, 4, 7, 8].to_vector(:numeric)
-    v4 = [2, nil, nil, nil,  3, 7, 8, 6].to_vector(:numeric)
-    ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
+    v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
+    v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
+    v3 = Daru::Vector.new([6, 2,  1000, 1000, 5, 4, 7, 8])
+    v4 = Daru::Vector.new([2, nil, nil, nil,  3, 7, 8, 6])
+    ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
     c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
     expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
                       [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
@@ -61,13 +61,13 @@ class StatsampleBivariateTestCase < Minitest::Test
   end
   should_with_gsl 'return same values for optimized and pairwise covariance matrix' do
     cases = 100
-    v1 = Statsample::Vector.new_numeric(cases) { rand }
-    v2 = Statsample::Vector.new_numeric(cases) { rand }
-    v3 = Statsample::Vector.new_numeric(cases) { rand }
-    v4 = Statsample::Vector.new_numeric(cases) { rand }
-    v5 = Statsample::Vector.new_numeric(cases) { rand }
+    v1 = Daru::Vector.new_with_size(cases) { rand }
+    v2 = Daru::Vector.new_with_size(cases) { rand }
+    v3 = Daru::Vector.new_with_size(cases) { rand }
+    v4 = Daru::Vector.new_with_size(cases) { rand }
+    v5 = Daru::Vector.new_with_size(cases) { rand }
-    ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
+    ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
     cor_opt = Statsample::Bivariate.covariance_matrix_optimized(ds)
@@ -76,13 +76,14 @@ class StatsampleBivariateTestCase < Minitest::Test
   end
   should_with_gsl 'return same values for optimized and pairwise correlation matrix' do
     cases = 100
-    v1 = Statsample::Vector.new_numeric(cases) { rand }
-    v2 = Statsample::Vector.new_numeric(cases) { rand }
-    v3 = Statsample::Vector.new_numeric(cases) { rand }
-    v4 = Statsample::Vector.new_numeric(cases) { rand }
-    v5 = Statsample::Vector.new_numeric(cases) { rand }
+    v1 = Daru::Vector.new_with_size(cases) { rand }
+    v2 = Daru::Vector.new_with_size(cases) { rand }
+    v3 = Daru::Vector.new_with_size(cases) { rand }
+    v4 = Daru::Vector.new_with_size(cases) { rand }
+    v5 = Daru::Vector.new_with_size(cases) { rand }
-    ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
+    ds = Daru::DataFrame.new({
+      :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :v5 => v5 })
     cor_opt = Statsample::Bivariate.correlation_matrix_optimized(ds)
@@ -90,11 +91,11 @@ class StatsampleBivariateTestCase < Minitest::Test
     assert_equal_matrix(cor_opt, cor_pw, 1e-15)
   end
   should 'return correct correlation_matrix without nils values' do
-    v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:numeric)
-    v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:numeric)
-    v3 = [6, 2,  1000, 1000, 5, 4, 7, 8].to_vector(:numeric)
-    v4 = [2, 4, 6, 7,  3, 7, 8, 6].to_vector(:numeric)
-    ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
+    v1 = Daru::Vector.new([6, 5, 4, 7, 8, 4, 3, 2])
+    v2 = Daru::Vector.new([2, 3, 7, 8, 6, 4, 3, 2])
+    v3 = Daru::Vector.new([6, 2,  1000, 1000, 5, 4, 7, 8])
+    v4 = Daru::Vector.new([2, 4, 6, 7,  3, 7, 8, 6])
+    ds = Daru::DataFrame.new({ :v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4 })
     c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
     expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
                       [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
@@ -129,25 +130,25 @@ class StatsampleBivariateTestCase < Minitest::Test
   end
   should "return correct value for Spearman's rho" do
-    v1 = [86, 97, 99, 100, 101, 103, 106, 110, 112, 113].to_vector(:numeric)
-    v2 = [0, 20, 28, 27, 50, 29, 7, 17, 6, 12].to_vector(:numeric)
+    v1 =Daru::Vector.new( [86, 97, 99, 100, 101, 103, 106, 110, 112, 113])
+    v2 =Daru::Vector.new( [0, 20, 28, 27, 50, 29, 7, 17, 6, 12])
     assert_in_delta(-0.175758, Statsample::Bivariate.spearman(v1, v2), 0.0001)
   end
   should 'return correct value for point_biserial correlation' do
-    c = [1, 3, 5, 6, 7, 100, 200, 300, 400, 300].to_vector(:numeric)
-    d = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0].to_vector(:numeric)
+    c = Daru::Vector.new([1, 3, 5, 6, 7, 100, 200, 300, 400, 300])
+    d = Daru::Vector.new([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
     assert_raises TypeError do
       Statsample::Bivariate.point_biserial(c, d)
     end
     assert_in_delta(Statsample::Bivariate.point_biserial(d, c), Statsample::Bivariate.pearson(d, c), 0.0001)
   end
   should 'return correct value for tau_a and tau_b' do
-    v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_vector(:numeric)
-    v2 = [1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11].to_vector(:numeric)
+    v1 = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
+    v2 = Daru::Vector.new([1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11])
     assert_in_delta(0.6727, Statsample::Bivariate.tau_a(v1, v2), 0.001)
     assert_in_delta(0.6727, Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1, v2).to_matrix)), 0.001)
-    v1 = [12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27].to_vector(:numeric)
-    v2 = [11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0].to_vector(:numeric)
+    v1 = Daru::Vector.new([12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27])
+    v2 = Daru::Vector.new([11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0])
     assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1, v2).to_matrix), 0.001)
   end
   should 'return correct value for gamma correlation' do
@@ -156,4 +157,8 @@ class StatsampleBivariateTestCase < Minitest::Test
     m2 = Matrix[[15, 12, 6, 5], [12, 8, 10, 8], [4, 6, 9, 10]]
     assert_in_delta(0.349, Statsample::Bivariate.gamma(m2), 0.001)
   end
+  should 'return correct residuals' do
+    # TODO: test Statsample::Bivariate.residuals
+  end
 end

data/test/test_codification.rb CHANGED

@@ -1,33 +1,33 @@
 require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
 class StatsampleCodificationTestCase < Minitest::Test
   def initialize(*args)
-    v1 = %w(run walk,run walking running sleep sleeping,dreaming sleep,dream).to_vector
+    v1 = Daru::Vector.new(%w(run walk,run walking running sleep sleeping,dreaming sleep,dream))
     @dict = { 'run' => 'r', 'walk' => 'w', 'walking' => 'w', 'running' => 'r', 'sleep' => 's', 'sleeping' => 's', 'dream' => 'd', 'dreaming' => 'd' }
-    @ds = { 'v1' => v1 }.to_dataset
+    @ds = Daru::DataFrame.new({ :v1 => v1 })
     super
   end
   def test_create_hash
     expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
-    hash = Statsample::Codification.create_hash(@ds, ['v1'])
-    assert_equal(['v1'], hash.keys)
-    assert_equal(expected_keys_v1, hash['v1'].keys.sort)
-    assert_equal(expected_keys_v1, hash['v1'].values.sort)
+    hash = Statsample::Codification.create_hash(@ds, [:v1])
+    assert_equal([:v1], hash.keys)
+    assert_equal(expected_keys_v1, hash[:v1].keys.sort)
+    assert_equal(expected_keys_v1, hash[:v1].values.sort)
   end
   def test_create_excel
     filename = Dir.tmpdir + '/test_excel' + Time.now.to_s + '.xls'
     # filename = Tempfile.new("test_codification_"+Time.now().to_s)
     Statsample::Codification.create_excel(@ds, ['v1'], filename)
-    field = (['v1'] * 8).to_vector
-    keys = %w(dream dreaming run running sleep sleeping walk walking).to_vector
-    ds = Statsample::Excel.read(filename)
-    assert_equal(field, ds['field'])
-    assert_equal(keys, ds['original'])
-    assert_equal(keys, ds['recoded'])
+    field = Daru::Vector.new(['v1'] * 8, name: :field)
+    keys = Daru::Vector.new(%w(dream dreaming run running sleep sleeping walk walking))
+    ds = Daru::DataFrame.from_excel(filename)
+    assert_equal(field, ds[:field])
+    assert_equal(keys, ds[:original])
+    assert_equal(keys, ds[:recoded])
     hash = Statsample::Codification.excel_to_recoded_hash(filename)
-    assert_equal(keys.data, hash['v1'].keys.sort)
-    assert_equal(keys.data, hash['v1'].values.sort)
+    assert_equal(keys.to_a, hash[:v1].keys.sort)
+    assert_equal(keys.to_a, hash[:v1].values.sort)
   end
   def test_create_yaml
@@ -35,44 +35,44 @@ class StatsampleCodificationTestCase < Minitest::Test
       Statsample::Codification.create_yaml(@ds, [])
     end
     expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
-    yaml_hash = Statsample::Codification.create_yaml(@ds, ['v1'])
+    yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1])
     h = YAML.load(yaml_hash)
-    assert_equal(['v1'], h.keys)
-    assert_equal(expected_keys_v1, h['v1'].keys.sort)
+    assert_equal([:v1], h.keys)
+    assert_equal(expected_keys_v1, h[:v1].keys.sort)
     tf = Tempfile.new('test_codification')
-    yaml_hash = Statsample::Codification.create_yaml(@ds, ['v1'], tf, Statsample::SPLIT_TOKEN)
+    yaml_hash = Statsample::Codification.create_yaml(@ds, [:v1], tf, Statsample::SPLIT_TOKEN)
     tf.close
     tf.open
     h = YAML.load(tf)
-    assert_equal(['v1'], h.keys)
-    assert_equal(expected_keys_v1, h['v1'].keys.sort)
+    assert_equal([:v1], h.keys)
+    assert_equal(expected_keys_v1, h[:v1].keys.sort)
     tf.close(true)
   end
   def test_recodification
     expected = [['r'], %w(w r), ['w'], ['r'], ['s'], %w(s d), %w(s d)]
-    assert_equal(expected, Statsample::Codification.recode_vector(@ds['v1'], @dict))
-    v2 = ['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'].to_vector
+    assert_equal(expected, Statsample::Codification.recode_vector(@ds[:v1], @dict))
+    v2 = Daru::Vector.new(['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'])
     expected = [['r'], %w(w d), nil, %w(w d)]
     assert_equal(expected, Statsample::Codification.recode_vector(v2, @dict))
   end
   def test_recode_dataset_simple
-    Statsample::Codification.recode_dataset_simple!(@ds, 'v1' => @dict)
-    expected_vector = ['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'].to_vector
-    assert_not_equal(expected_vector, @ds['v1'])
-    assert_equal(expected_vector, @ds['v1_recoded'])
+    Statsample::Codification.recode_dataset_simple!(@ds, :v1 => @dict)
+    expected_vector = Daru::Vector.new(['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'])
+    assert_not_equal(expected_vector, @ds[:v1])
+    assert_equal(expected_vector, @ds[:v1_recoded])
   end
   def test_recode_dataset_split
-    Statsample::Codification.recode_dataset_split!(@ds, 'v1' => @dict)
+    Statsample::Codification.recode_dataset_split!(@ds, :v1 => @dict)
     e = {}
-    e['r'] = [1, 1, 0, 1, 0, 0, 0].to_vector
-    e['w'] = [0, 1, 1, 0, 0, 0, 0].to_vector
-    e['s'] = [0, 0, 0, 0, 1, 1, 1].to_vector
-    e['d'] = [0, 0, 0, 0, 0, 1, 1].to_vector
-    e.each{|k, expected|
-      assert_equal(expected, @ds['v1_' + k], "Error on key #{k}")
+    e['r'] = Daru::Vector.new([1, 1, 0, 1, 0, 0, 0])
+    e['w'] = Daru::Vector.new([0, 1, 1, 0, 0, 0, 0])
+    e['s'] = Daru::Vector.new([0, 0, 0, 0, 1, 1, 1])
+    e['d'] = Daru::Vector.new([0, 0, 0, 0, 0, 1, 1])
+    e.each { |k, expected|
+      assert_equal(expected, @ds[('v1_' + k).to_sym], "Error on key #{k}")
     }
   end
 end