RubyGems - statsample - Versions diffs - 1.5.0 → 2.0.0 - Mend

statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

checksums.yaml +4 -4
data/.build.sh +15 -0
data/.gitignore +1 -0
data/.travis.yml +19 -7
data/CONTRIBUTING.md +33 -0
data/History.txt +5 -0
data/README.md +41 -53
data/benchmarks/correlation_matrix_15_variables.rb +6 -5
data/benchmarks/correlation_matrix_5_variables.rb +6 -5
data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
data/examples/boxplot.rb +17 -5
data/examples/correlation_matrix.rb +36 -7
data/examples/dataset.rb +25 -5
data/examples/dominance_analysis.rb +8 -7
data/examples/dominance_analysis_bootstrap.rb +16 -11
data/examples/histogram.rb +16 -2
data/examples/icc.rb +5 -6
data/examples/levene.rb +17 -3
data/examples/multiple_regression.rb +6 -3
data/examples/parallel_analysis.rb +11 -6
data/examples/polychoric.rb +26 -13
data/examples/principal_axis.rb +8 -4
data/examples/reliability.rb +10 -10
data/examples/scatterplot.rb +8 -0
data/examples/t_test.rb +7 -0
data/examples/u_test.rb +10 -2
data/examples/vector.rb +9 -6
data/examples/velicer_map_test.rb +12 -8
data/lib/statsample.rb +13 -47
data/lib/statsample/analysis/suite.rb +1 -1
data/lib/statsample/anova/oneway.rb +6 -6
data/lib/statsample/anova/twoway.rb +26 -24
data/lib/statsample/bivariate.rb +78 -61
data/lib/statsample/bivariate/pearson.rb +2 -2
data/lib/statsample/codification.rb +45 -32
data/lib/statsample/converter/csv.rb +15 -53
data/lib/statsample/converter/spss.rb +6 -5
data/lib/statsample/converters.rb +50 -211
data/lib/statsample/crosstab.rb +26 -25
data/lib/statsample/daru.rb +117 -0
data/lib/statsample/dataset.rb +70 -942
data/lib/statsample/dominanceanalysis.rb +16 -17
data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
data/lib/statsample/factor/parallelanalysis.rb +17 -19
data/lib/statsample/factor/pca.rb +21 -20
data/lib/statsample/factor/principalaxis.rb +3 -3
data/lib/statsample/graph/boxplot.rb +8 -16
data/lib/statsample/graph/histogram.rb +4 -4
data/lib/statsample/graph/scatterplot.rb +8 -7
data/lib/statsample/histogram.rb +128 -119
data/lib/statsample/matrix.rb +20 -16
data/lib/statsample/multiset.rb +39 -38
data/lib/statsample/regression.rb +3 -3
data/lib/statsample/regression/multiple.rb +8 -10
data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
data/lib/statsample/regression/multiple/baseengine.rb +32 -32
data/lib/statsample/regression/multiple/gslengine.rb +33 -36
data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
data/lib/statsample/reliability.rb +23 -25
data/lib/statsample/reliability/icc.rb +8 -7
data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
data/lib/statsample/reliability/scaleanalysis.rb +58 -60
data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
data/lib/statsample/resample.rb +1 -1
data/lib/statsample/shorthand.rb +29 -25
data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
data/lib/statsample/test/levene.rb +28 -27
data/lib/statsample/test/t.rb +7 -9
data/lib/statsample/test/umannwhitney.rb +28 -28
data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
data/lib/statsample/vector.rb +70 -1013
data/lib/statsample/version.rb +1 -1
data/statsample.gemspec +12 -16
data/test/helpers_tests.rb +1 -1
data/test/test_analysis.rb +17 -17
data/test/test_anova_contrast.rb +6 -6
data/test/test_anovatwowaywithdataset.rb +8 -8
data/test/test_anovawithvectors.rb +8 -8
data/test/test_awesome_print_bug.rb +1 -1
data/test/test_bartlettsphericity.rb +4 -4
data/test/test_bivariate.rb +48 -43
data/test/test_codification.rb +33 -33
data/test/test_crosstab.rb +9 -9
data/test/test_dataset.rb +28 -458
data/test/test_factor.rb +46 -38
data/test/test_factor_pa.rb +22 -13
data/test/test_ggobi.rb +4 -4
data/test/test_gsl.rb +4 -4
data/test/test_histogram.rb +3 -3
data/test/test_matrix.rb +13 -13
data/test/test_multiset.rb +103 -91
data/test/test_regression.rb +57 -52
data/test/test_reliability.rb +55 -45
data/test/test_reliability_icc.rb +8 -8
data/test/test_reliability_skillscale.rb +26 -24
data/test/test_resample.rb +1 -1
data/test/test_statistics.rb +3 -13
data/test/test_stest.rb +9 -9
data/test/test_stratified.rb +3 -3
data/test/test_test_t.rb +12 -12
data/test/test_umannwhitney.rb +2 -2
data/test/test_vector.rb +76 -613
data/test/test_wilcoxonsignedrank.rb +4 -4
metadata +57 -28
data/lib/statsample/rserve_extension.rb +0 -20
data/lib/statsample/vector/gsl.rb +0 -106
data/test/fixtures/repeated_fields.csv +0 -7
data/test/fixtures/scientific_notation.csv +0 -4
data/test/fixtures/test_csv.csv +0 -7
data/test/fixtures/test_xls.xls +0 -0
data/test/test_csv.rb +0 -63
data/test/test_rserve_extension.rb +0 -42
data/test/test_xls.rb +0 -52

data/test/test_regression.rb CHANGED

@@ -3,21 +3,21 @@ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
 class StatsampleRegressionTestCase < Minitest::Test
   context 'Example with missing data' do
     setup do
-      @x = [0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857].to_numeric
+      @x = Daru::Vector.new([0.285714285714286, 0.114285714285714, 0.314285714285714, 0.2, 0.2, 0.228571428571429, 0.2, 0.4, 0.714285714285714, 0.285714285714286, 0.285714285714286, 0.228571428571429, 0.485714285714286, 0.457142857142857, 0.257142857142857, 0.228571428571429, 0.285714285714286, 0.285714285714286, 0.285714285714286, 0.142857142857143, 0.285714285714286, 0.514285714285714, 0.485714285714286, 0.228571428571429, 0.285714285714286, 0.342857142857143, 0.285714285714286, 0.0857142857142857])
-      @y = [nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil].to_numeric
-      @ds = { 'x' => @x, 'y' => @y }.to_dataset
-      @lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, 'y')
+      @y = Daru::Vector.new([nil, 0.233333333333333, nil, 0.266666666666667, 0.366666666666667, nil, 0.333333333333333, 0.3, 0.666666666666667, 0.0333333333333333, 0.333333333333333, nil, nil, 0.533333333333333, 0.433333333333333, 0.4, 0.4, 0.5, 0.4, 0.266666666666667, 0.166666666666667, 0.666666666666667, 0.433333333333333, 0.166666666666667, nil, 0.4, 0.366666666666667, nil])
+      @ds = Daru::DataFrame.new({ :x => @x, :y => @y })
+      @lr = Statsample::Regression::Multiple::RubyEngine.new(@ds, :y)
     end
     should 'have correct values' do
       assert_in_delta(0.455, @lr.r2, 0.001)
       assert_in_delta(0.427, @lr.r2_adjusted, 0.001)
       assert_in_delta(0.1165, @lr.se_estimate, 0.001)
       assert_in_delta(15.925, @lr.f, 0.0001)
-      assert_in_delta(0.675, @lr.standarized_coeffs['x'], 0.001)
-      assert_in_delta(0.778, @lr.coeffs['x'], 0.001, 'coeff x')
+      assert_in_delta(0.675, @lr.standarized_coeffs[:x], 0.001)
+      assert_in_delta(0.778, @lr.coeffs[:x], 0.001, 'coeff x')
       assert_in_delta(0.132, @lr.constant, 0.001, 'constant')
-      assert_in_delta(0.195, @lr.coeffs_se['x'], 0.001, 'coeff x se')
+      assert_in_delta(0.195, @lr.coeffs_se[:x], 0.001, 'coeff x se')
       assert_in_delta(0.064, @lr.constant_se, 0.001, 'constant se')
     end
   end
@@ -26,24 +26,24 @@ class StatsampleRegressionTestCase < Minitest::Test
     a, b = rand, rand
-    x1 = samples.times.map { rand }.to_numeric
-    x2 = samples.times.map { rand }.to_numeric
-    x3 = samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) }.to_numeric
-    y = samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand }.to_numeric
+    x1 = Daru::Vector.new(samples.times.map { rand })
+    x2 = Daru::Vector.new(samples.times.map { rand })
+    x3 = Daru::Vector.new(samples.times.map { |i| x1[i] * (1 + a) + x2[i] * (1 + b) })
+    y  = Daru::Vector.new(samples.times.map { |i| x1[i] + x2[i] + x3[i] + rand })
-    ds = { 'x1' => x1, 'x2' => x2, 'x3' => x3, 'y' => y }.to_dataset
+    ds = Daru::DataFrame.new({ :x1 => x1, :x2 => x2, :x3 => x3, :y => y })
     assert_raise(Statsample::Regression::LinearDependency) {
-      Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
+      Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
     }
   end
   def test_parameters
-    @x = [13, 20, 10, 33, 15].to_vector(:numeric)
-    @y = [23, 18, 35, 10, 27].to_vector(:numeric)
+    @x =Daru::Vector.new([13, 20, 10, 33, 15])
+    @y =Daru::Vector.new([23, 18, 35, 10, 27])
     reg = Statsample::Regression::Simple.new_from_vectors(@x, @y)
     _test_simple_regression(reg)
-    ds = { 'x' => @x, 'y' => @y }.to_dataset
-    reg = Statsample::Regression::Simple.new_from_dataset(ds, 'x', 'y')
+    ds = Daru::DataFrame.new({ :x => @x, :y => @y })
+    reg = Statsample::Regression::Simple.new_from_dataset(ds, :x, :y)
     _test_simple_regression(reg)
     reg = Statsample::Regression.simple(@x, @y)
     _test_simple_regression(reg)
@@ -57,11 +57,11 @@ class StatsampleRegressionTestCase < Minitest::Test
   end
   def test_summaries
-    a = 10.times.map { rand(100) }.to_numeric
-    b = 10.times.map { rand(100) }.to_numeric
-    y = 10.times.map { rand(100) }.to_numeric
-    ds = { 'a' => a, 'b' => b, 'y' => y }.to_dataset
-    lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
+    a = Daru::Vector.new(10.times.map { rand(100) })
+    b = Daru::Vector.new(10.times.map { rand(100) })
+    y = Daru::Vector.new(10.times.map { rand(100) })
+    ds = Daru::DataFrame.new({ :a => a, :b => b, :y => y })
+    lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
     assert(lr.summary.size > 0)
   end
@@ -87,12 +87,12 @@ class StatsampleRegressionTestCase < Minitest::Test
   end
   def test_multiple_regression_pairwise_2
-    @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3].to_vector(:numeric)
-    @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2].to_vector(:numeric)
-    @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7].to_vector(:numeric)
-    @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil].to_vector(:numeric)
-    ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
-    lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
+    @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 3, nil, 3, nil, 3])
+    @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4, 2, 2, nil, 6, 2])
+    @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100, nil, 3, 7, nil, 7])
+    @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 30, 40, nil, 50, nil])
+    ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
+    lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
     assert_in_delta(2407.436, lr.sst, 0.001)
     assert_in_delta(0.752, lr.r, 0.001, 'pairwise r')
     assert_in_delta(0.565, lr.r2, 0.001)
@@ -103,12 +103,12 @@ class StatsampleRegressionTestCase < Minitest::Test
   def test_multiple_regression_gsl
     if Statsample.has_gsl?
-      @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
-      @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
-      @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
-      @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
-      ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
-      lr = Statsample::Regression::Multiple::GslEngine.new(ds, 'y')
+      @a =Daru::Vector.new( [1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
+      @b =Daru::Vector.new( [3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
+      @c =Daru::Vector.new( [11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
+      @y =Daru::Vector.new( [3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
+      ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
+      lr = Statsample::Regression::Multiple::GslEngine.new(ds, :y)
       assert(lr.summary.size > 0)
       model_test(lr, 'gsl')
       predicted = [1.7857, 6.0989, 3.2433, 7.2908, 4.9667, 10.3428, 8.8158, 10.4717, 23.6639, 25.3198]
@@ -127,8 +127,8 @@ class StatsampleRegressionTestCase < Minitest::Test
   end
   def model_test_matrix(lr, name = 'undefined')
-    stan_coeffs = { 'a' => 0.151, 'b' => -0.547, 'c' => 0.997 }
-    unstan_coeffs = { 'a' => 0.695, 'b' => -4.286, 'c' => 0.266 }
+    stan_coeffs = { :a => 0.151, :b => -0.547, :c => 0.997 }
+    unstan_coeffs = { :a => 0.695, :b => -4.286, :c => 0.266 }
     unstan_coeffs.each_key{|k|
       assert_in_delta(unstan_coeffs[k], lr.coeffs[k], 0.001, "b coeffs - #{name}")
@@ -145,15 +145,15 @@ class StatsampleRegressionTestCase < Minitest::Test
     assert_in_delta(20.908, lr.f, 0.001)
     assert_in_delta(0.001, lr.probability, 0.001)
-    assert_in_delta(0.226, lr.tolerance('a'), 0.001)
+    assert_in_delta(0.226, lr.tolerance(:a), 0.001)
-    coeffs_se = { 'a' => 1.171, 'b' => 1.129, 'c' => 0.072 }
+    coeffs_se = { :a => 1.171, :b => 1.129, :c => 0.072 }
     ccoeffs_se = lr.coeffs_se
     coeffs_se.each_key{|k|
       assert_in_delta(coeffs_se[k], ccoeffs_se[k], 0.001)
     }
-    coeffs_t = { 'a' => 0.594, 'b' => -3.796, 'c' => 3.703 }
+    coeffs_t = { :a => 0.594, :b => -3.796, :c => 3.703 }
     ccoeffs_t = lr.coeffs_t
     coeffs_t.each_key{|k|
       assert_in_delta(coeffs_t[k], ccoeffs_t[k], 0.001)
@@ -174,32 +174,37 @@ class StatsampleRegressionTestCase < Minitest::Test
   end
   def test_regression_matrix
-    @a = [1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
-    @b = [3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
-    @c = [11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
-    @y = [3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
-    ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
+    @a = Daru::Vector.new([1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
+    @b = Daru::Vector.new([3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
+    @c = Daru::Vector.new([11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
+    @y = Daru::Vector.new([3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
+    ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
     cor = Statsample::Bivariate.correlation_matrix(ds)
-    lr = Statsample::Regression::Multiple::MatrixEngine.new(cor, 'y', y_mean: @y.mean, x_mean: { 'a' => ds['a'].mean, 'b' => ds['b'].mean, 'c' => ds['c'].mean }, cases: @a.size, y_sd: @y.sd, x_sd: { 'a' => @a.sd, 'b' => @b.sd, 'c' => @c.sd })
+    lr = Statsample::Regression::Multiple::MatrixEngine.new(
+      cor, :y, y_mean: @y.mean,
+      x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean },
+      cases: @a.size, y_sd: @y.sd, x_sd: { :a => @a.sd, :b => @b.sd, :c => @c.sd })
     assert_nil(lr.constant_se)
     assert_nil(lr.constant_t)
     model_test_matrix(lr, 'correlation matrix')
     covariance = Statsample::Bivariate.covariance_matrix(ds)
-    lr = Statsample::Regression::Multiple::MatrixEngine.new(covariance, 'y', y_mean: @y.mean, x_mean: { 'a' => ds['a'].mean, 'b' => ds['b'].mean, 'c' => ds['c'].mean }, cases: @a.size)
+    lr = Statsample::Regression::Multiple::MatrixEngine.new(
+      covariance, :y, y_mean: @y.mean,
+      x_mean: { :a => ds[:a].mean, :b => ds[:b].mean, :c => ds[:c].mean }, cases: @a.size)
     assert(lr.summary.size > 0)
     model_test(lr, 'covariance matrix')
   end
   def test_regression_rubyengine
-    @a = [nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7].to_vector(:numeric)
-    @b = [nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4].to_vector(:numeric)
-    @c = [nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100].to_vector(:numeric)
-    @y = [nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30].to_vector(:numeric)
-    ds = { 'a' => @a, 'b' => @b, 'c' => @c, 'y' => @y }.to_dataset
-    lr = Statsample::Regression::Multiple::RubyEngine.new(ds, 'y')
+    @a = Daru::Vector.new([nil, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7])
+    @b = Daru::Vector.new([nil, 3, 3, 4, 4, 5, 5, 6, 6, 4, 4])
+    @c = Daru::Vector.new([nil, 11, 22, 30, 40, 50, 65, 78, 79, 99, 100])
+    @y = Daru::Vector.new([nil, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30])
+    ds = Daru::DataFrame.new({ :a => @a, :b => @b, :c => @c, :y => @y })
+    lr = Statsample::Regression::Multiple::RubyEngine.new(ds, :y)
     assert_equal(11, lr.total_cases)
     assert_equal(10, lr.valid_cases)
     model_test(lr, 'rubyengine with missing data')

data/test/test_reliability.rb CHANGED

@@ -1,6 +1,14 @@
 require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
 class StatsampleReliabilityTestCase < Minitest::Test
   context Statsample::Reliability do
+    setup do
+      Daru.lazy_update = true
+    end
+    teardown do
+      Daru.lazy_update = false
+    end
     should 'return correct r according to Spearman-Brown prophecy' do
       r = 0.6849
       n = 62.quo(15)
@@ -15,26 +23,27 @@ class StatsampleReliabilityTestCase < Minitest::Test
       setup do
         @samples = 40
         @n_variables = rand(10) + 2
-        @ds = Statsample::Dataset.new
-        base = @samples.times.collect { |_a| rand }.to_numeric
+        @ds = Daru::DataFrame.new({}, index: @samples)
+        base = Daru::Vector.new(@samples.times.collect { |_a| rand })
         @n_variables.times do |i|
-          @ds[i] = base.collect { |v| v + rand }.to_numeric
+          @ds[i] = Daru::Vector.new(base.collect { |v| v + rand })
         end
-        @ds.update_valid_data
-        @k = @ds.fields.size
+        @ds.update
+        @k = @ds.ncols
         @cm = Statsample::Bivariate.covariance_matrix(@ds)
         @dse = @ds.dup
-        @dse.fields.each do |f|
-          @dse[f] = @dse[f].standarized
+        @dse.vectors.each do |f|
+          @dse[f] = @dse[f].standardize
         end
+        @dse.update
         @cme = Statsample::Bivariate.covariance_matrix(@dse)
         @a = Statsample::Reliability.cronbach_alpha(@ds)
         @as = Statsample::Reliability.cronbach_alpha_standarized(@ds)
       end
       should 'alpha will be equal to sum of matrix covariance less the individual variances' do
         total_sum = @cm.total_sum
-        ind_var = @ds.fields.inject(0) { |ac, v| ac + @ds[v].variance }
+        ind_var = @ds.vectors.to_a.inject(0) { |ac, v| ac + @ds[v].variance }
         expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
         assert_in_delta(expected, @a, 1e-10)
       end
@@ -57,7 +66,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
       should 'standarized alpha will be equal to sum of matrix covariance less the individual variances on standarized values' do
         total_sum = @cme.total_sum
-        ind_var = @dse.fields.inject(0) { |ac, v| ac + @dse[v].variance }
+        ind_var = @dse.vectors.to_a.inject(0) { |ac, v| ac + @dse[v].variance }
         expected = @k.quo(@k - 1) * (1 - (ind_var.quo(total_sum)))
         assert_in_delta(expected, @as, 1e-10)
       end
@@ -67,31 +76,31 @@ class StatsampleReliabilityTestCase < Minitest::Test
         @samples = 100
         @points = rand(10) + 3
         @max_point = (@points - 1) * 3
-        @x1 = @samples.times.map { rand(@points) }.to_numeric
-        @x2 = @samples.times.map { rand(@points) }.to_numeric
-        @x3 = @samples.times.map { rand(@points) }.to_numeric
-        @ds = { 'a' => @x1, 'b' => @x2, 'c' => @x3 }.to_dataset
+        @x1 = Daru::Vector.new(@samples.times.map { rand(@points) })
+        @x2 = Daru::Vector.new(@samples.times.map { rand(@points) })
+        @x3 = Daru::Vector.new(@samples.times.map { rand(@points) })
+        @ds = Daru::DataFrame.new({ :a => @x1, :b => @x2, :c => @x3 })
         @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds)
       end
       should 'have a correct automatic vector_total' do
         assert_equal(@ds.vector_sum, @icc.vector_total)
       end
       should 'have a correct different vector_total' do
-        x2 = @samples.times.map { rand(10) }.to_numeric
+        x2 = Daru::Vector.new(@samples.times.map { rand(10) })
         @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, x2)
         assert_equal(x2, @icc.vector_total)
         assert_raises(ArgumentError) do
-          inc = (@samples + 10).times.map { rand(10) }.to_numeric
+          inc = Daru::Vector.new((@samples + 10).times.map { rand(10) })
           @icc = Statsample::Reliability::ItemCharacteristicCurve.new(@ds, inc)
         end
       end
       should 'have 0% for 0 points on maximum value values' do
-        max = @icc.curve_field('a', 0)[@max_point.to_f]
+        max = @icc.curve_field(:a, 0)[@max_point.to_f]
         max ||= 0
         assert_in_delta(0, max)
       end
       should 'have 0 for max value on minimum value' do
-        max = @icc.curve_field('a', @max_point)[0.0]
+        max = @icc.curve_field(:a, @max_point)[0.0]
         max ||= 0
         assert_in_delta(0, max)
       end
@@ -107,7 +116,7 @@ class StatsampleReliabilityTestCase < Minitest::Test
         expected = total.each {|k, v|
           total[k] = v.quo(total_g[k])
         }
-        assert_equal(expected, @icc.curve_field('a', index))
+        assert_equal(expected, @icc.curve_field(:a, index))
       end
     end
@@ -119,33 +128,34 @@ class StatsampleReliabilityTestCase < Minitest::Test
         h = {}
         @scales.times {|s|
           @items_per_scale.times {|i|
-            h["#{s}_#{i}"] = (size.times.map { (s * 2) + rand }).to_numeric
+            h["#{s}_#{i}".to_sym] = Daru::Vector.new((size.times.map { (s * 2) + rand }))
           }
         }
-        @ds = h.to_dataset
+        @ds = Daru::DataFrame.new(h)
         @msa = Statsample::Reliability::MultiScaleAnalysis.new(name: 'Multiple Analysis') do |m|
           m.scale 'complete', @ds
           @scales.times {|s|
-            m.scale "scale_#{s}", @ds.clone(@items_per_scale.times.map { |i| "#{s}_#{i}" }), name: "Scale #{s}"
+            m.scale "scale_#{s}", @ds.clone(*@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}"
           }
         end
       end
       should 'Retrieve correct ScaleAnalysis for whole scale' do
         sa = Statsample::Reliability::ScaleAnalysis.new(@ds, name: 'Scale complete')
         assert_equal(sa.variances_mean, @msa.scale('complete').variances_mean)
       end
       should 'Retrieve correct ScaleAnalysis for each scale' do
         @scales.times {|s|
-          sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }), name: "Scale #{s}")
+          sa = Statsample::Reliability::ScaleAnalysis.new(@ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }), name: "Scale #{s}")
           assert_equal(sa.variances_mean, @msa.scale("scale_#{s}").variances_mean)
         }
       end
       should 'retrieve correct correlation matrix for each scale' do
-        vectors = { 'complete' => @ds.vector_sum }
+        vectors = { :complete => @ds.vector_sum }
         @scales.times {|s|
-          vectors["scale_#{s}"] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }).vector_sum
+          vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
         }
-        ds2 = vectors.to_dataset
+        ds2 = Daru::DataFrame.new(vectors)
         assert_equal(Statsample::Bivariate.correlation_matrix(ds2), @msa.correlation_matrix)
       end
       should 'delete scale using delete_scale' do
@@ -156,9 +166,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
         @msa.delete_scale('complete')
         vectors = {}
         @scales.times {|s|
-          vectors["scale_#{s}"] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}" }).vector_sum
+          vectors["scale_#{s}".to_sym] = @ds.dup(@items_per_scale.times.map { |i| "#{s}_#{i}".to_sym }).vector_sum
         }
-        ds2 = vectors.to_dataset
+        ds2 = Daru::DataFrame.new(vectors)
         cor_matrix = Statsample::Bivariate.correlation_matrix(ds2)
         m = 3
         pca = Statsample::Factor::PCA.new(cor_matrix, m: m)
@@ -177,31 +187,31 @@ class StatsampleReliabilityTestCase < Minitest::Test
     end
     context Statsample::Reliability::ScaleAnalysis do
       setup do
-        @x1 = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30].to_numeric
-        @x2 = [1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50].to_numeric
-        @x3 = [2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40].to_numeric
-        @x4 = [1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30].to_numeric
-        @ds = { 'x1' => @x1, 'x2' => @x2, 'x3' => @x3, 'x4' => @x4 }.to_dataset
+        @x1 = Daru::Vector.new([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 30])
+        @x2 = Daru::Vector.new([1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 50])
+        @x3 = Daru::Vector.new([2, 2, 1, 1, 1, 2, 2, 2, 3, 4, 5, 40])
+        @x4 = Daru::Vector.new([1, 2, 3, 4, 4, 4, 4, 3, 4, 4, 5, 30])
+        @ds = Daru::DataFrame.new({ :x1 => @x1, :x2 => @x2, :x3 => @x3, :x4 => @x4 })
         @ia = Statsample::Reliability::ScaleAnalysis.new(@ds)
         @cov_matrix = @ia.cov_m
       end
       should 'return correct values for item analysis' do
         assert_in_delta(0.980, @ia.alpha, 0.001)
         assert_in_delta(0.999, @ia.alpha_standarized, 0.001)
-        var_mean = 4.times.map { |m| @cov_matrix[m, m] }.to_numeric.mean
+        var_mean = Daru::Vector.new(4.times.map { |m| @cov_matrix[m, m] }).mean
         assert_in_delta(var_mean, @ia.variances_mean)
-        assert_equal(@x1.mean, @ia.item_statistics['x1'][:mean])
-        assert_equal(@x4.mean, @ia.item_statistics['x4'][:mean])
-        assert_in_delta(@x1.sds, @ia.item_statistics['x1'][:sds], 1e-14)
-        assert_in_delta(@x4.sds, @ia.item_statistics['x4'][:sds], 1e-14)
+        assert_equal(@x1.mean, @ia.item_statistics[:x1][:mean])
+        assert_equal(@x4.mean, @ia.item_statistics[:x4][:mean])
+        assert_in_delta(@x1.sds, @ia.item_statistics[:x1][:sds], 1e-14)
+        assert_in_delta(@x4.sds, @ia.item_statistics[:x4][:sds], 1e-14)
         ds2 = @ds.clone
-        ds2.delete_vector('x1')
+        ds2.delete_vector(:x1)
         vector_sum = ds2.vector_sum
-        assert_equal(vector_sum.mean, @ia.stats_if_deleted['x1'][:mean])
-        assert_equal(vector_sum.sds, @ia.stats_if_deleted['x1'][:sds])
-        assert_in_delta(vector_sum.variance, @ia.stats_if_deleted['x1'][:variance_sample], 1e-10)
+        assert_equal(vector_sum.mean, @ia.stats_if_deleted[:x1][:mean])
+        assert_equal(vector_sum.sds, @ia.stats_if_deleted[:x1][:sds])
+        assert_in_delta(vector_sum.variance, @ia.stats_if_deleted[:x1][:variance_sample], 1e-10)
-        assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted['x1'][:alpha])
+        assert_equal(Statsample::Reliability.cronbach_alpha(ds2), @ia.stats_if_deleted[:x1][:alpha])
         covariances = []
         4.times.each {|i|
@@ -211,9 +221,9 @@ class StatsampleReliabilityTestCase < Minitest::Test
             end
           }
         }
-        assert_in_delta(covariances.to_numeric.mean, @ia.covariances_mean)
-        assert_in_delta(0.999, @ia.item_total_correlation['x1'], 0.001)
-        assert_in_delta(1050.455, @ia.stats_if_deleted['x1'][:variance_sample], 0.001)
+        assert_in_delta(Daru::Vector.new(covariances).mean, @ia.covariances_mean)
+        assert_in_delta(0.999, @ia.item_total_correlation[:x1], 0.001)
+        assert_in_delta(1050.455, @ia.stats_if_deleted[:x1][:variance_sample], 0.001)
       end
       should 'return a summary' do
         assert(@ia.summary.size > 0)

data/test/test_reliability_icc.rb CHANGED

@@ -5,11 +5,11 @@ $reliability_icc = nil
 class StatsampleReliabilityIccTestCase < Minitest::Test
   context Statsample::Reliability::ICC do
     setup do
-      a = [9, 6, 8, 7, 10, 6].to_numeric
-      b = [2, 1, 4, 1, 5, 2].to_numeric
-      c = [5, 3, 6, 2, 6, 4].to_numeric
-      d = [8, 2, 8, 6, 9, 7].to_numeric
-      @ds = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
+      a = Daru::Vector.new([9, 6, 8, 7, 10, 6])
+      b = Daru::Vector.new([2, 1, 4, 1, 5, 2])
+      c = Daru::Vector.new([5, 3, 6, 2, 6, 4])
+      d = Daru::Vector.new([8, 2, 8, 6, 9, 7])
+      @ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
       @icc = Statsample::Reliability::ICC.new(@ds)
     end
     should 'basic method be correct' do
@@ -114,7 +114,7 @@ class StatsampleReliabilityIccTestCase < Minitest::Test
     begin
       require 'rserve'
-      require 'statsample/rserve_extension'
+      require 'daru/extensions/rserve'
       context 'McGraw and Wong' do
         teardown do
           @r = $reliability_icc[:r].close unless $reliability_icc[:r].nil?
@@ -122,11 +122,11 @@ class StatsampleReliabilityIccTestCase < Minitest::Test
         setup do
           if $reliability_icc.nil?
             size = 100
-            a = size.times.map { rand(10) }.to_numeric
+            a = Daru::Vector.new(size.times.map { rand(10) })
             b = a.recode { |i| i + rand(4) - 2 }
             c = a.recode { |i| i + rand(4) - 2 }
             d = a.recode { |i| i + rand(4) - 2 }
-            @ds = { 'a' => a, 'b' => b, 'c' => c, 'd' => d }.to_dataset
+            @ds = Daru::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
             @icc = Statsample::Reliability::ICC.new(@ds)
             @r = Rserve::Connection.new