statsample 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -3
- data/History.txt +4 -0
- data/README.md +4 -0
- data/lib/statsample/converter/csv.rb +41 -54
- data/lib/statsample/converters.rb +18 -19
- data/lib/statsample/version.rb +1 -1
- data/test/fixtures/scientific_notation.csv +4 -0
- data/test/helpers_tests.rb +37 -38
- data/test/test_analysis.rb +96 -97
- data/test/test_anova_contrast.rb +22 -22
- data/test/test_anovaoneway.rb +12 -12
- data/test/test_anovatwoway.rb +16 -17
- data/test/test_anovatwowaywithdataset.rb +22 -24
- data/test/test_anovawithvectors.rb +67 -69
- data/test/test_awesome_print_bug.rb +9 -9
- data/test/test_bartlettsphericity.rb +13 -13
- data/test/test_bivariate.rb +122 -126
- data/test/test_codification.rb +51 -49
- data/test/test_crosstab.rb +44 -40
- data/test/test_csv.rb +52 -70
- data/test/test_dataset.rb +347 -330
- data/test/test_dominance_analysis.rb +22 -24
- data/test/test_factor.rb +163 -166
- data/test/test_factor_map.rb +25 -30
- data/test/test_factor_pa.rb +28 -28
- data/test/test_ggobi.rb +19 -18
- data/test/test_gsl.rb +13 -15
- data/test/test_histogram.rb +74 -77
- data/test/test_matrix.rb +29 -31
- data/test/test_multiset.rb +132 -126
- data/test/test_regression.rb +143 -149
- data/test/test_reliability.rb +149 -155
- data/test/test_reliability_icc.rb +100 -104
- data/test/test_reliability_skillscale.rb +38 -40
- data/test/test_resample.rb +14 -12
- data/test/test_rserve_extension.rb +33 -33
- data/test/test_srs.rb +5 -5
- data/test/test_statistics.rb +52 -50
- data/test/test_stest.rb +27 -28
- data/test/test_stratified.rb +10 -10
- data/test/test_test_f.rb +17 -17
- data/test/test_test_kolmogorovsmirnov.rb +21 -21
- data/test/test_test_t.rb +52 -52
- data/test/test_umannwhitney.rb +16 -16
- data/test/test_vector.rb +419 -410
- data/test/test_wilcoxonsignedrank.rb +60 -63
- data/test/test_xls.rb +41 -41
- metadata +55 -5
- data/web/Rakefile +0 -39
    
        data/test/test_bivariate.rb
    CHANGED
    
    | @@ -1,163 +1,159 @@ | |
| 1 | 
            -
            require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
         | 
| 2 | 
            -
            class StatsampleBivariateTestCase <  | 
| 3 | 
            -
              should  | 
| 4 | 
            -
                v1=[1,2,3,4,5,6].to_vector(:scale)
         | 
| 5 | 
            -
                v2=[6,2,4,10,12,8].to_vector(:scale)
         | 
| 6 | 
            -
                assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1,v2))
         | 
| 1 | 
            +
            require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
         | 
| 2 | 
            +
            class StatsampleBivariateTestCase < Minitest::Test
         | 
| 3 | 
            +
              should 'method sum of squares should be correct' do
         | 
| 4 | 
            +
                v1 = [1, 2, 3, 4, 5, 6].to_vector(:scale)
         | 
| 5 | 
            +
                v2 = [6, 2, 4, 10, 12, 8].to_vector(:scale)
         | 
| 6 | 
            +
                assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1, v2))
         | 
| 7 7 | 
             
              end
         | 
| 8 | 
            -
              should_with_gsl  | 
| 9 | 
            -
                v1=20.times.collect {| | 
| 10 | 
            -
                v2=20.times.collect {| | 
| 11 | 
            -
                assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
         | 
| 8 | 
            +
              should_with_gsl 'return same covariance with ruby and gls implementation' do
         | 
| 9 | 
            +
                v1 = 20.times.collect { |_a| rand }.to_scale
         | 
| 10 | 
            +
                v2 = 20.times.collect { |_a| rand }.to_scale
         | 
| 11 | 
            +
                assert_in_delta(Statsample::Bivariate.covariance(v1, v2), Statsample::Bivariate.covariance_slow(v1, v2), 0.001)
         | 
| 12 12 | 
             
              end
         | 
| 13 13 |  | 
| 14 | 
            -
              should_with_gsl  | 
| 15 | 
            -
                v1=20.times.collect {| | 
| 16 | 
            -
                v2=20.times.collect {| | 
| 14 | 
            +
              should_with_gsl 'return same correlation with ruby and gls implementation' do
         | 
| 15 | 
            +
                v1 = 20.times.collect { |_a| rand }.to_scale
         | 
| 16 | 
            +
                v2 = 20.times.collect { |_a| rand }.to_scale
         | 
| 17 17 |  | 
| 18 | 
            -
                assert_in_delta(GSL::Stats | 
| 18 | 
            +
                assert_in_delta(GSL::Stats.correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1, v2), 1e-10)
         | 
| 19 19 | 
             
              end
         | 
| 20 | 
            -
              should  | 
| 21 | 
            -
                v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
         | 
| 22 | 
            -
                v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
         | 
| 23 | 
            -
                assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
         | 
| 24 | 
            -
                assert_in_delta(0.525,Statsample::Bivariate.pearson_slow(v1,v2), 0.001)
         | 
| 20 | 
            +
              should 'return correct pearson correlation' do
         | 
| 21 | 
            +
                v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:scale)
         | 
| 22 | 
            +
                v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:scale)
         | 
| 23 | 
            +
                assert_in_delta(0.525, Statsample::Bivariate.pearson(v1, v2), 0.001)
         | 
| 24 | 
            +
                assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v1, v2), 0.001)
         | 
| 25 25 |  | 
| 26 | 
            -
                v3=[6,2,  1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
         | 
| 27 | 
            -
                v4=[2,nil,nil,nil,  3,7,8,6,4,3,2,500].to_vector(:scale)
         | 
| 28 | 
            -
                assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
         | 
| 26 | 
            +
                v3 = [6, 2,  1000, 1000, 5, 4, 7, 8, 4, 3, 2, nil].to_vector(:scale)
         | 
| 27 | 
            +
                v4 = [2, nil, nil, nil,  3, 7, 8, 6, 4, 3, 2, 500].to_vector(:scale)
         | 
| 28 | 
            +
                assert_in_delta(0.525, Statsample::Bivariate.pearson(v3, v4), 0.001)
         | 
| 29 29 | 
             
                # Test ruby method
         | 
| 30 | 
            -
                v3a,v4a=Statsample.only_valid v3, v4
         | 
| 31 | 
            -
                assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a,v4a),0.001)
         | 
| 30 | 
            +
                v3a, v4a = Statsample.only_valid v3, v4
         | 
| 31 | 
            +
                assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a, v4a), 0.001)
         | 
| 32 32 | 
             
              end
         | 
| 33 | 
            -
              should  | 
| 34 | 
            -
                v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
         | 
| 35 | 
            -
                v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
         | 
| 36 | 
            -
                r=Statsample::Bivariate::Pearson.new(v1,v2)
         | 
| 37 | 
            -
                assert_in_delta(0.525,r.r, 0.001)
         | 
| 38 | 
            -
                assert_in_delta(Statsample::Bivariate.t_pearson(v1,v2), r.t, 0.001)
         | 
| 39 | 
            -
                assert_in_delta(Statsample::Bivariate.prop_pearson(r.t,8 | 
| 40 | 
            -
                assert(r.summary.size>0)
         | 
| 33 | 
            +
              should 'return correct values for t_pearson and prop_pearson' do
         | 
| 34 | 
            +
                v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:scale)
         | 
| 35 | 
            +
                v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:scale)
         | 
| 36 | 
            +
                r = Statsample::Bivariate::Pearson.new(v1, v2)
         | 
| 37 | 
            +
                assert_in_delta(0.525, r.r, 0.001)
         | 
| 38 | 
            +
                assert_in_delta(Statsample::Bivariate.t_pearson(v1, v2), r.t, 0.001)
         | 
| 39 | 
            +
                assert_in_delta(Statsample::Bivariate.prop_pearson(r.t, 8, :both), r.probability, 0.001)
         | 
| 40 | 
            +
                assert(r.summary.size > 0)
         | 
| 41 41 | 
             
              end
         | 
| 42 | 
            -
              should  | 
| 43 | 
            -
                v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
         | 
| 44 | 
            -
                v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
         | 
| 45 | 
            -
                v3=[6,2,  1000,1000,5,4,7,8].to_vector(:scale)
         | 
| 46 | 
            -
                v4=[2,nil,nil,nil,  3,7,8,6].to_vector(:scale)
         | 
| 47 | 
            -
                ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
         | 
| 48 | 
            -
                c= | 
| 49 | 
            -
                expected=Matrix[ | 
| 50 | 
            -
             | 
| 42 | 
            +
              should 'return correct correlation_matrix with nils values' do
         | 
| 43 | 
            +
                v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:scale)
         | 
| 44 | 
            +
                v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:scale)
         | 
| 45 | 
            +
                v3 = [6, 2,  1000, 1000, 5, 4, 7, 8].to_vector(:scale)
         | 
| 46 | 
            +
                v4 = [2, nil, nil, nil,  3, 7, 8, 6].to_vector(:scale)
         | 
| 47 | 
            +
                ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
         | 
| 48 | 
            +
                c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
         | 
| 49 | 
            +
                expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
         | 
| 50 | 
            +
                                  [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
         | 
| 51 51 | 
             
                ]
         | 
| 52 | 
            -
                obt=Statsample::Bivariate.correlation_matrix(ds)
         | 
| 52 | 
            +
                obt = Statsample::Bivariate.correlation_matrix(ds)
         | 
| 53 53 | 
             
                for i in 0...expected.row_size
         | 
| 54 54 | 
             
                  for j in 0...expected.column_size
         | 
| 55 | 
            -
                    #puts expected[i,j].inspect
         | 
| 56 | 
            -
                    #puts obt[i,j].inspect
         | 
| 57 | 
            -
                    assert_in_delta(expected[i,j], obt[i,j],0.0001, "#{expected[i,j].class}!=#{obt[i,j].class}  ")
         | 
| 55 | 
            +
                    # puts expected[i,j].inspect
         | 
| 56 | 
            +
                    # puts obt[i,j].inspect
         | 
| 57 | 
            +
                    assert_in_delta(expected[i, j], obt[i, j], 0.0001, "#{expected[i, j].class}!=#{obt[i, j].class}  ")
         | 
| 58 58 | 
             
                  end
         | 
| 59 59 | 
             
                end
         | 
| 60 | 
            -
                #assert_equal(expected,obt)
         | 
| 60 | 
            +
                # assert_equal(expected,obt)
         | 
| 61 61 | 
             
              end
         | 
| 62 | 
            -
              should_with_gsl  | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 62 | 
            +
              should_with_gsl 'return same values for optimized and pairwise covariance matrix' do
         | 
| 63 | 
            +
                cases = 100
         | 
| 64 | 
            +
                v1 = Statsample::Vector.new_scale(cases) { rand }
         | 
| 65 | 
            +
                v2 = Statsample::Vector.new_scale(cases) { rand }
         | 
| 66 | 
            +
                v3 = Statsample::Vector.new_scale(cases) { rand }
         | 
| 67 | 
            +
                v4 = Statsample::Vector.new_scale(cases) { rand }
         | 
| 68 | 
            +
                v5 = Statsample::Vector.new_scale(cases) { rand }
         | 
| 69 69 |  | 
| 70 | 
            -
             | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 75 | 
            -
             | 
| 70 | 
            +
                ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                cor_opt = Statsample::Bivariate.covariance_matrix_optimized(ds)
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                cor_pw = Statsample::Bivariate.covariance_matrix_pairwise(ds)
         | 
| 75 | 
            +
                assert_equal_matrix(cor_opt, cor_pw, 1e-15)
         | 
| 76 76 | 
             
              end
         | 
| 77 | 
            -
              should_with_gsl  | 
| 78 | 
            -
                
         | 
| 79 | 
            -
                cases | 
| 80 | 
            -
                 | 
| 81 | 
            -
                 | 
| 82 | 
            -
                 | 
| 83 | 
            -
                 | 
| 84 | 
            -
             | 
| 77 | 
            +
              should_with_gsl 'return same values for optimized and pairwise correlation matrix' do
         | 
| 78 | 
            +
                cases = 100
         | 
| 79 | 
            +
                v1 = Statsample::Vector.new_scale(cases) { rand }
         | 
| 80 | 
            +
                v2 = Statsample::Vector.new_scale(cases) { rand }
         | 
| 81 | 
            +
                v3 = Statsample::Vector.new_scale(cases) { rand }
         | 
| 82 | 
            +
                v4 = Statsample::Vector.new_scale(cases) { rand }
         | 
| 83 | 
            +
                v5 = Statsample::Vector.new_scale(cases) { rand }
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4, 'v5' => v5 }.to_dataset
         | 
| 85 86 |  | 
| 86 | 
            -
                 | 
| 87 | 
            -
             | 
| 88 | 
            -
                 | 
| 89 | 
            -
                
         | 
| 90 | 
            -
                cor_pw =Statsample::Bivariate.correlation_matrix_pairwise(ds)
         | 
| 91 | 
            -
                assert_equal_matrix(cor_opt,cor_pw,1e-15)
         | 
| 92 | 
            -
                
         | 
| 87 | 
            +
                cor_opt = Statsample::Bivariate.correlation_matrix_optimized(ds)
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                cor_pw = Statsample::Bivariate.correlation_matrix_pairwise(ds)
         | 
| 90 | 
            +
                assert_equal_matrix(cor_opt, cor_pw, 1e-15)
         | 
| 93 91 | 
             
              end
         | 
| 94 | 
            -
              should  | 
| 95 | 
            -
                v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
         | 
| 96 | 
            -
                v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
         | 
| 97 | 
            -
                v3=[6,2,  1000,1000,5,4,7,8].to_vector(:scale)
         | 
| 98 | 
            -
                v4=[2,4,6,7,  3,7,8,6].to_vector(:scale)
         | 
| 99 | 
            -
                ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
         | 
| 100 | 
            -
                c= | 
| 101 | 
            -
                expected=Matrix[ | 
| 102 | 
            -
             | 
| 92 | 
            +
              should 'return correct correlation_matrix without nils values' do
         | 
| 93 | 
            +
                v1 = [6, 5, 4, 7, 8, 4, 3, 2].to_vector(:scale)
         | 
| 94 | 
            +
                v2 = [2, 3, 7, 8, 6, 4, 3, 2].to_vector(:scale)
         | 
| 95 | 
            +
                v3 = [6, 2,  1000, 1000, 5, 4, 7, 8].to_vector(:scale)
         | 
| 96 | 
            +
                v4 = [2, 4, 6, 7,  3, 7, 8, 6].to_vector(:scale)
         | 
| 97 | 
            +
                ds = { 'v1' => v1, 'v2' => v2, 'v3' => v3, 'v4' => v4 }.to_dataset
         | 
| 98 | 
            +
                c = proc { |n1, n2| Statsample::Bivariate.pearson(n1, n2) }
         | 
| 99 | 
            +
                expected = Matrix[[c.call(v1, v1), c.call(v1, v2), c.call(v1, v3), c.call(v1, v4)], [c.call(v2, v1), c.call(v2, v2), c.call(v2, v3), c.call(v2, v4)], [c.call(v3, v1), c.call(v3, v2), c.call(v3, v3), c.call(v3, v4)],
         | 
| 100 | 
            +
                                  [c.call(v4, v1), c.call(v4, v2), c.call(v4, v3), c.call(v4, v4)]
         | 
| 103 101 | 
             
                ]
         | 
| 104 | 
            -
                obt=Statsample::Bivariate.correlation_matrix(ds)
         | 
| 102 | 
            +
                obt = Statsample::Bivariate.correlation_matrix(ds)
         | 
| 105 103 | 
             
                for i in 0...expected.row_size
         | 
| 106 104 | 
             
                  for j in 0...expected.column_size
         | 
| 107 | 
            -
                    #puts expected[i,j].inspect
         | 
| 108 | 
            -
                    #puts obt[i,j].inspect
         | 
| 109 | 
            -
                    assert_in_delta(expected[i,j], obt[i,j],0.0001, "#{expected[i,j].class}!=#{obt[i,j].class}  ")
         | 
| 105 | 
            +
                    # puts expected[i,j].inspect
         | 
| 106 | 
            +
                    # puts obt[i,j].inspect
         | 
| 107 | 
            +
                    assert_in_delta(expected[i, j], obt[i, j], 0.0001, "#{expected[i, j].class}!=#{obt[i, j].class}  ")
         | 
| 110 108 | 
             
                  end
         | 
| 111 109 | 
             
                end
         | 
| 112 | 
            -
                #assert_equal(expected,obt)
         | 
| 110 | 
            +
                # assert_equal(expected,obt)
         | 
| 113 111 | 
             
              end
         | 
| 114 112 |  | 
| 115 | 
            -
              
         | 
| 116 | 
            -
             | 
| 117 | 
            -
                assert_in_delta(0. | 
| 118 | 
            -
                 | 
| 119 | 
            -
                 | 
| 120 | 
            -
                n | 
| 121 | 
            -
                 | 
| 122 | 
            -
                assert(Statsample::Bivariate.prop_pearson(t,n | 
| 123 | 
            -
                assert(Statsample::Bivariate.prop_pearson(t,n | 
| 124 | 
            -
                assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)
         | 
| 113 | 
            +
              should 'return correct value for prop pearson' do
         | 
| 114 | 
            +
                assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084, 94), 94), 0.01)
         | 
| 115 | 
            +
                assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046, 95), 95), 0.01)
         | 
| 116 | 
            +
                r = 0.9
         | 
| 117 | 
            +
                n = 100
         | 
| 118 | 
            +
                t = Statsample::Bivariate.t_r(r, n)
         | 
| 119 | 
            +
                assert(Statsample::Bivariate.prop_pearson(t, n, :both) < 0.05)
         | 
| 120 | 
            +
                assert(Statsample::Bivariate.prop_pearson(t, n, :right) < 0.05)
         | 
| 121 | 
            +
                assert(Statsample::Bivariate.prop_pearson(t, n, :left) > 0.05)
         | 
| 125 122 |  | 
| 126 | 
            -
                r | 
| 127 | 
            -
                n=100
         | 
| 128 | 
            -
                t=Statsample::Bivariate.t_r(r,n)
         | 
| 129 | 
            -
                assert(Statsample::Bivariate.prop_pearson(t,n | 
| 130 | 
            -
                assert(Statsample::Bivariate.prop_pearson(t,n | 
| 131 | 
            -
                assert(Statsample::Bivariate.prop_pearson(t,n | 
| 123 | 
            +
                r = -0.9
         | 
| 124 | 
            +
                n = 100
         | 
| 125 | 
            +
                t = Statsample::Bivariate.t_r(r, n)
         | 
| 126 | 
            +
                assert(Statsample::Bivariate.prop_pearson(t, n, :both) < 0.05)
         | 
| 127 | 
            +
                assert(Statsample::Bivariate.prop_pearson(t, n, :right) > 0.05)
         | 
| 128 | 
            +
                assert(Statsample::Bivariate.prop_pearson(t, n, :left) < 0.05)
         | 
| 132 129 | 
             
              end
         | 
| 133 130 |  | 
| 134 131 | 
             
              should "return correct value for Spearman's rho" do
         | 
| 135 | 
            -
                v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
         | 
| 136 | 
            -
                v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
         | 
| 137 | 
            -
                assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
         | 
| 138 | 
            -
             | 
| 132 | 
            +
                v1 = [86, 97, 99, 100, 101, 103, 106, 110, 112, 113].to_vector(:scale)
         | 
| 133 | 
            +
                v2 = [0, 20, 28, 27, 50, 29, 7, 17, 6, 12].to_vector(:scale)
         | 
| 134 | 
            +
                assert_in_delta(-0.175758, Statsample::Bivariate.spearman(v1, v2), 0.0001)
         | 
| 139 135 | 
             
              end
         | 
| 140 | 
            -
              should  | 
| 141 | 
            -
                c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
         | 
| 142 | 
            -
                d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
         | 
| 136 | 
            +
              should 'return correct value for point_biserial correlation' do
         | 
| 137 | 
            +
                c = [1, 3, 5, 6, 7, 100, 200, 300, 400, 300].to_vector(:scale)
         | 
| 138 | 
            +
                d = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0].to_vector(:scale)
         | 
| 143 139 | 
             
                assert_raises TypeError do
         | 
| 144 | 
            -
                  Statsample::Bivariate.point_biserial(c,d)
         | 
| 140 | 
            +
                  Statsample::Bivariate.point_biserial(c, d)
         | 
| 145 141 | 
             
                end
         | 
| 146 | 
            -
                assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
         | 
| 142 | 
            +
                assert_in_delta(Statsample::Bivariate.point_biserial(d, c), Statsample::Bivariate.pearson(d, c), 0.0001)
         | 
| 147 143 | 
             
              end
         | 
| 148 | 
            -
              should  | 
| 149 | 
            -
                v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
         | 
| 150 | 
            -
                v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
         | 
| 151 | 
            -
                assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
         | 
| 152 | 
            -
                assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
         | 
| 153 | 
            -
                v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
         | 
| 154 | 
            -
                v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
         | 
| 155 | 
            -
                assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
         | 
| 144 | 
            +
              should 'return correct value for tau_a and tau_b' do
         | 
| 145 | 
            +
                v1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_vector(:ordinal)
         | 
| 146 | 
            +
                v2 = [1, 3, 4, 5, 7, 8, 2, 9, 10, 6, 11].to_vector(:ordinal)
         | 
| 147 | 
            +
                assert_in_delta(0.6727, Statsample::Bivariate.tau_a(v1, v2), 0.001)
         | 
| 148 | 
            +
                assert_in_delta(0.6727, Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1, v2).to_matrix)), 0.001)
         | 
| 149 | 
            +
                v1 = [12, 14, 14, 17, 19, 19, 19, 19, 19, 20, 21, 21, 21, 21, 21, 22, 23, 24, 24, 24, 26, 26, 27].to_vector(:ordinal)
         | 
| 150 | 
            +
                v2 = [11, 4, 4, 2, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0].to_vector(:ordinal)
         | 
| 151 | 
            +
                assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1, v2).to_matrix), 0.001)
         | 
| 156 152 | 
             
              end
         | 
| 157 | 
            -
              should  | 
| 158 | 
            -
                m=Matrix[[10,5,2],[10,15,20]]
         | 
| 159 | 
            -
                assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
         | 
| 160 | 
            -
                m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
         | 
| 161 | 
            -
                assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
         | 
| 153 | 
            +
              should 'return correct value for gamma correlation' do
         | 
| 154 | 
            +
                m = Matrix[[10, 5, 2], [10, 15, 20]]
         | 
| 155 | 
            +
                assert_in_delta(0.636, Statsample::Bivariate.gamma(m), 0.001)
         | 
| 156 | 
            +
                m2 = Matrix[[15, 12, 6, 5], [12, 8, 10, 8], [4, 6, 9, 10]]
         | 
| 157 | 
            +
                assert_in_delta(0.349, Statsample::Bivariate.gamma(m2), 0.001)
         | 
| 162 158 | 
             
              end
         | 
| 163 159 | 
             
            end
         | 
    
        data/test/test_codification.rb
    CHANGED
    
    | @@ -1,76 +1,78 @@ | |
| 1 | 
            -
            require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
         | 
| 2 | 
            -
            class StatsampleCodificationTestCase <  | 
| 3 | 
            -
             | 
| 1 | 
            +
            require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
         | 
| 2 | 
            +
            class StatsampleCodificationTestCase < Minitest::Test
         | 
| 4 3 | 
             
              def initialize(*args)
         | 
| 5 | 
            -
                v1 | 
| 6 | 
            -
                @dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s', 'dream'=>'d', 'dreaming'=>'d'}
         | 
| 7 | 
            -
                @ds={ | 
| 4 | 
            +
                v1 = %w(run walk,run walking running sleep sleeping,dreaming sleep,dream).to_vector
         | 
| 5 | 
            +
                @dict = { 'run' => 'r', 'walk' => 'w', 'walking' => 'w', 'running' => 'r', 'sleep' => 's', 'sleeping' => 's', 'dream' => 'd', 'dreaming' => 'd' }
         | 
| 6 | 
            +
                @ds = { 'v1' => v1 }.to_dataset
         | 
| 8 7 | 
             
                super
         | 
| 9 8 | 
             
              end
         | 
| 9 | 
            +
             | 
| 10 10 | 
             
              def test_create_hash
         | 
| 11 | 
            -
                expected_keys_v1 | 
| 12 | 
            -
                hash=Statsample::Codification.create_hash(@ds,['v1'])
         | 
| 13 | 
            -
                assert_equal(['v1'],hash.keys)
         | 
| 14 | 
            -
                assert_equal(expected_keys_v1,hash['v1'].keys.sort)
         | 
| 15 | 
            -
                assert_equal(expected_keys_v1,hash['v1'].values.sort)
         | 
| 11 | 
            +
                expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
         | 
| 12 | 
            +
                hash = Statsample::Codification.create_hash(@ds, ['v1'])
         | 
| 13 | 
            +
                assert_equal(['v1'], hash.keys)
         | 
| 14 | 
            +
                assert_equal(expected_keys_v1, hash['v1'].keys.sort)
         | 
| 15 | 
            +
                assert_equal(expected_keys_v1, hash['v1'].values.sort)
         | 
| 16 16 | 
             
              end
         | 
| 17 | 
            +
             | 
| 17 18 | 
             
              def test_create_excel
         | 
| 18 | 
            -
                filename=Dir | 
| 19 | 
            -
                #filename = Tempfile.new("test_codification_"+Time.now().to_s)
         | 
| 19 | 
            +
                filename = Dir.tmpdir + '/test_excel' + Time.now.to_s + '.xls'
         | 
| 20 | 
            +
                # filename = Tempfile.new("test_codification_"+Time.now().to_s)
         | 
| 20 21 | 
             
                Statsample::Codification.create_excel(@ds, ['v1'], filename)
         | 
| 21 | 
            -
                field=([ | 
| 22 | 
            -
                keys | 
| 23 | 
            -
                ds=Statsample::Excel.read(filename)
         | 
| 22 | 
            +
                field = (['v1'] * 8).to_vector
         | 
| 23 | 
            +
                keys = %w(dream dreaming run running sleep sleeping walk walking).to_vector
         | 
| 24 | 
            +
                ds = Statsample::Excel.read(filename)
         | 
| 24 25 | 
             
                assert_equal(field, ds['field'])
         | 
| 25 26 | 
             
                assert_equal(keys, ds['original'])
         | 
| 26 27 | 
             
                assert_equal(keys, ds['recoded'])
         | 
| 27 | 
            -
                hash=Statsample::Codification.excel_to_recoded_hash(filename)
         | 
| 28 | 
            +
                hash = Statsample::Codification.excel_to_recoded_hash(filename)
         | 
| 28 29 | 
             
                assert_equal(keys.data, hash['v1'].keys.sort)
         | 
| 29 30 | 
             
                assert_equal(keys.data, hash['v1'].values.sort)
         | 
| 30 | 
            -
             | 
| 31 31 | 
             
              end
         | 
| 32 | 
            +
             | 
| 32 33 | 
             
              def test_create_yaml
         | 
| 33 | 
            -
                assert_raise | 
| 34 | 
            -
                  Statsample::Codification.create_yaml(@ds,[])
         | 
| 34 | 
            +
                assert_raise ArgumentError do
         | 
| 35 | 
            +
                  Statsample::Codification.create_yaml(@ds, [])
         | 
| 35 36 | 
             
                end
         | 
| 36 | 
            -
                expected_keys_v1 | 
| 37 | 
            -
                yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
         | 
| 38 | 
            -
                h=YAML | 
| 39 | 
            -
                assert_equal(['v1'],h.keys)
         | 
| 40 | 
            -
                assert_equal(expected_keys_v1,h['v1'].keys.sort)
         | 
| 41 | 
            -
                tf = Tempfile.new( | 
| 42 | 
            -
                yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],tf, Statsample::SPLIT_TOKEN)
         | 
| 37 | 
            +
                expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort
         | 
| 38 | 
            +
                yaml_hash = Statsample::Codification.create_yaml(@ds, ['v1'])
         | 
| 39 | 
            +
                h = YAML.load(yaml_hash)
         | 
| 40 | 
            +
                assert_equal(['v1'], h.keys)
         | 
| 41 | 
            +
                assert_equal(expected_keys_v1, h['v1'].keys.sort)
         | 
| 42 | 
            +
                tf = Tempfile.new('test_codification')
         | 
| 43 | 
            +
                yaml_hash = Statsample::Codification.create_yaml(@ds, ['v1'], tf, Statsample::SPLIT_TOKEN)
         | 
| 43 44 | 
             
                tf.close
         | 
| 44 45 | 
             
                tf.open
         | 
| 45 | 
            -
                h=YAML | 
| 46 | 
            -
                assert_equal(['v1'],h.keys)
         | 
| 47 | 
            -
                assert_equal(expected_keys_v1,h['v1'].keys.sort)
         | 
| 46 | 
            +
                h = YAML.load(tf)
         | 
| 47 | 
            +
                assert_equal(['v1'], h.keys)
         | 
| 48 | 
            +
                assert_equal(expected_keys_v1, h['v1'].keys.sort)
         | 
| 48 49 | 
             
                tf.close(true)
         | 
| 49 50 | 
             
              end
         | 
| 51 | 
            +
             | 
| 50 52 | 
             
              def test_recodification
         | 
| 51 | 
            -
                expected=[['r'], | 
| 52 | 
            -
                assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'] | 
| 53 | 
            -
                v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
         | 
| 54 | 
            -
                expected=[['r'], | 
| 55 | 
            -
                assert_equal(expected,Statsample::Codification.recode_vector(v2 | 
| 53 | 
            +
                expected = [['r'], %w(w r), ['w'], ['r'], ['s'], %w(s d), %w(s d)]
         | 
| 54 | 
            +
                assert_equal(expected, Statsample::Codification.recode_vector(@ds['v1'], @dict))
         | 
| 55 | 
            +
                v2 = ['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'].to_vector
         | 
| 56 | 
            +
                expected = [['r'], %w(w d), nil, %w(w d)]
         | 
| 57 | 
            +
                assert_equal(expected, Statsample::Codification.recode_vector(v2, @dict))
         | 
| 56 58 | 
             
              end
         | 
| 59 | 
            +
             | 
| 57 60 | 
             
              def test_recode_dataset_simple
         | 
| 58 | 
            -
                Statsample::Codification.recode_dataset_simple!(@ds, | 
| 59 | 
            -
                expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
         | 
| 60 | 
            -
                assert_not_equal(expected_vector | 
| 61 | 
            -
                assert_equal(expected_vector | 
| 61 | 
            +
                Statsample::Codification.recode_dataset_simple!(@ds, 'v1' => @dict)
         | 
| 62 | 
            +
                expected_vector = ['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'].to_vector
         | 
| 63 | 
            +
                assert_not_equal(expected_vector, @ds['v1'])
         | 
| 64 | 
            +
                assert_equal(expected_vector, @ds['v1_recoded'])
         | 
| 62 65 | 
             
              end
         | 
| 63 | 
            -
              def test_recode_dataset_split
         | 
| 64 | 
            -
                Statsample::Codification.recode_dataset_split!(@ds,{'v1'=>@dict})
         | 
| 65 | 
            -
                e={}
         | 
| 66 | 
            -
                e['r']=[1,1,0,1,0,0,0].to_vector
         | 
| 67 | 
            -
                e['w']=[0,1,1,0,0,0,0].to_vector
         | 
| 68 | 
            -
                e['s']=[0,0,0,0,1,1,1].to_vector
         | 
| 69 | 
            -
                e['d']=[0,0,0,0,0,1,1].to_vector
         | 
| 70 | 
            -
                e.each{|k,expected|
         | 
| 71 | 
            -
                  assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
         | 
| 72 66 |  | 
| 67 | 
            +
              def test_recode_dataset_split
         | 
| 68 | 
            +
                Statsample::Codification.recode_dataset_split!(@ds, 'v1' => @dict)
         | 
| 69 | 
            +
                e = {}
         | 
| 70 | 
            +
                e['r'] = [1, 1, 0, 1, 0, 0, 0].to_vector
         | 
| 71 | 
            +
                e['w'] = [0, 1, 1, 0, 0, 0, 0].to_vector
         | 
| 72 | 
            +
                e['s'] = [0, 0, 0, 0, 1, 1, 1].to_vector
         | 
| 73 | 
            +
                e['d'] = [0, 0, 0, 0, 0, 1, 1].to_vector
         | 
| 74 | 
            +
                e.each{|k, expected|
         | 
| 75 | 
            +
                  assert_equal(expected, @ds['v1_' + k], "Error on key #{k}")
         | 
| 73 76 | 
             
                }
         | 
| 74 77 | 
             
              end
         | 
| 75 | 
            -
             | 
| 76 78 | 
             
            end
         | 
    
        data/test/test_crosstab.rb
    CHANGED
    
    | @@ -1,63 +1,67 @@ | |
| 1 | 
            -
            require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
         | 
| 2 | 
            -
            class StatsampleCrosstabTestCase <  | 
| 3 | 
            -
             | 
| 1 | 
            +
            require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
         | 
| 2 | 
            +
            class StatsampleCrosstabTestCase < Minitest::Test
         | 
| 4 3 | 
             
              def initialize(*args)
         | 
| 5 | 
            -
                @v1 | 
| 6 | 
            -
                @v2 | 
| 7 | 
            -
                @ct=Statsample::Crosstab.new(@v1 | 
| 4 | 
            +
                @v1 = %w(black blonde black black red black brown black blonde black red black blonde).to_vector
         | 
| 5 | 
            +
                @v2 = %w(woman man man woman man man man woman man woman woman man man).to_vector
         | 
| 6 | 
            +
                @ct = Statsample::Crosstab.new(@v1, @v2)
         | 
| 8 7 | 
             
                super
         | 
| 9 8 | 
             
              end
         | 
| 9 | 
            +
             | 
| 10 10 | 
             
              def test_crosstab_errors
         | 
| 11 | 
            -
                e1 | 
| 11 | 
            +
                e1 = %w(black blonde black black red black brown black blonde black)
         | 
| 12 12 | 
             
                assert_raise ArgumentError do
         | 
| 13 | 
            -
                  Statsample::Crosstab.new(e1 | 
| 13 | 
            +
                  Statsample::Crosstab.new(e1, @v2)
         | 
| 14 14 | 
             
                end
         | 
| 15 | 
            -
                e2 | 
| 15 | 
            +
                e2 = %w(black blonde black black red black brown black blonde black black).to_vector
         | 
| 16 16 |  | 
| 17 17 | 
             
                assert_raise ArgumentError do
         | 
| 18 | 
            -
                  Statsample::Crosstab.new(e2 | 
| 18 | 
            +
                  Statsample::Crosstab.new(e2, @v2)
         | 
| 19 19 | 
             
                end
         | 
| 20 20 | 
             
                assert_nothing_raised do
         | 
| 21 | 
            -
                  Statsample::Crosstab.new(@v1 | 
| 21 | 
            +
                  Statsample::Crosstab.new(@v1, @v2)
         | 
| 22 22 | 
             
                end
         | 
| 23 23 | 
             
              end
         | 
| 24 | 
            +
             | 
| 24 25 | 
             
              def test_crosstab_basic
         | 
| 25 | 
            -
                assert_equal(%w | 
| 26 | 
            -
                assert_equal(%w | 
| 27 | 
            -
                assert_equal({'black'=>7,'blonde'=>3,'red'=>2,'brown'=>1}, @ct.rows_total)
         | 
| 28 | 
            -
                assert_equal({'man'=>8,'woman'=>5}, @ct.cols_total)
         | 
| 26 | 
            +
                assert_equal(%w(black blonde brown red), @ct.rows_names)
         | 
| 27 | 
            +
                assert_equal(%w(man woman), @ct.cols_names)
         | 
| 28 | 
            +
                assert_equal({ 'black' => 7, 'blonde' => 3, 'red' => 2, 'brown' => 1 }, @ct.rows_total)
         | 
| 29 | 
            +
                assert_equal({ 'man' => 8, 'woman' => 5 }, @ct.cols_total)
         | 
| 29 30 | 
             
              end
         | 
| 31 | 
            +
             | 
| 30 32 | 
             
              def test_crosstab_frequencies
         | 
| 31 | 
            -
                fq | 
| 32 | 
            -
                assert_equal(8,fq.size)
         | 
| 33 | 
            -
                sum=fq.inject(0) {|s,x| s+x[1]}
         | 
| 34 | 
            -
                assert_equal(13,sum)
         | 
| 35 | 
            -
                fr | 
| 36 | 
            -
                assert_equal(4,fr.size)
         | 
| 37 | 
            -
                assert_equal(%w | 
| 38 | 
            -
                fc | 
| 39 | 
            -
                assert_equal(2,fc.size)
         | 
| 40 | 
            -
                assert_equal(%w | 
| 41 | 
            -
                assert_equal(Matrix.rows([[3,4],[3,0],[1,0],[1,1]]) | 
| 33 | 
            +
                fq = @ct.frequencies
         | 
| 34 | 
            +
                assert_equal(8, fq.size)
         | 
| 35 | 
            +
                sum = fq.inject(0) { |s, x| s + x[1] }
         | 
| 36 | 
            +
                assert_equal(13, sum)
         | 
| 37 | 
            +
                fr = @ct.frequencies_by_row
         | 
| 38 | 
            +
                assert_equal(4, fr.size)
         | 
| 39 | 
            +
                assert_equal(%w(black blonde brown red), fr.keys.sort)
         | 
| 40 | 
            +
                fc = @ct.frequencies_by_col
         | 
| 41 | 
            +
                assert_equal(2, fc.size)
         | 
| 42 | 
            +
                assert_equal(%w(man woman), fc.keys.sort)
         | 
| 43 | 
            +
                assert_equal(Matrix.rows([[3, 4], [3, 0], [1, 0], [1, 1]]), @ct.to_matrix)
         | 
| 42 44 | 
             
              end
         | 
| 45 | 
            +
             | 
| 43 46 | 
             
              def test_summary
         | 
| 44 | 
            -
                @ct.percentage_row=true
         | 
| 45 | 
            -
                @ct.percentage_column=true
         | 
| 46 | 
            -
                @ct.percentage_total=true
         | 
| 47 | 
            -
                assert(@ct.summary.size>0)
         | 
| 47 | 
            +
                @ct.percentage_row = true
         | 
| 48 | 
            +
                @ct.percentage_column = true
         | 
| 49 | 
            +
                @ct.percentage_total = true
         | 
| 50 | 
            +
                assert(@ct.summary.size > 0)
         | 
| 48 51 | 
             
              end
         | 
| 52 | 
            +
             | 
| 49 53 | 
             
              def test_expected
         | 
| 50 | 
            -
                v1 | 
| 51 | 
            -
                v2 | 
| 52 | 
            -
                ct=Statsample::Crosstab.new(v1,v2)
         | 
| 53 | 
            -
                assert_equal(Matrix[[2.5,2.5],[2.5,2.5]],ct.matrix_expected)
         | 
| 54 | 
            +
                v1 = %w(1 1 1 1 1 0 0 0 0 0).to_vector
         | 
| 55 | 
            +
                v2 = %w(0 0 0 0 0 1 1 1 1 1).to_vector
         | 
| 56 | 
            +
                ct = Statsample::Crosstab.new(v1, v2)
         | 
| 57 | 
            +
                assert_equal(Matrix[[2.5, 2.5], [2.5, 2.5]], ct.matrix_expected)
         | 
| 54 58 | 
             
              end
         | 
| 59 | 
            +
             | 
| 55 60 | 
             
              def test_crosstab_with_scale
         | 
| 56 | 
            -
                v1 | 
| 57 | 
            -
                v2 | 
| 58 | 
            -
                ct=Statsample::Crosstab.new(v1,v2)
         | 
| 59 | 
            -
                assert_equal(Matrix[[0,5],[5,0]],ct.to_matrix)
         | 
| 60 | 
            -
                assert_nothing_raised { ct.summary } | 
| 61 | 
            +
                v1 = %w(1 1 1 1 1 0 0 0 0 0).to_scale
         | 
| 62 | 
            +
                v2 = %w(0 0 0 0 0 1 1 1 1 1).to_scale
         | 
| 63 | 
            +
                ct = Statsample::Crosstab.new(v1, v2)
         | 
| 64 | 
            +
                assert_equal(Matrix[[0, 5], [5, 0]], ct.to_matrix)
         | 
| 65 | 
            +
                assert_nothing_raised { ct.summary }
         | 
| 61 66 | 
             
              end
         | 
| 62 | 
            -
             | 
| 63 67 | 
             
            end
         |