RubyGems - statsample - Versions diffs - 0.6.1 → 0.6.2 - Mend

statsample 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

data/History.txt +4 -0
data/Manifest.txt +8 -19
data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
data/demo/dominance_analysis_bootstrap.rb +20 -0
data/demo/dominanceanalysis.rb +11 -0
data/demo/multiple_regression.rb +40 -0
data/demo/polychoric.rb +13 -0
data/demo/tetrachoric.rb +10 -0
data/lib/distribution.rb +1 -0
data/lib/distribution/normalbivariate.rb +100 -0
data/lib/statsample.rb +4 -105
data/lib/statsample/bivariate.rb +5 -1
data/lib/statsample/bivariate/polychoric.rb +581 -0
data/lib/statsample/bivariate/tetrachoric.rb +37 -5
data/lib/statsample/converters.rb +11 -0
data/lib/statsample/dominanceanalysis.rb +104 -90
data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
data/lib/statsample/factor/pca.rb +1 -2
data/lib/statsample/factor/principalaxis.rb +2 -2
data/lib/statsample/graph/svghistogram.rb +170 -172
data/lib/statsample/matrix.rb +79 -0
data/lib/statsample/mle.rb +6 -4
data/lib/statsample/mle/probit.rb +0 -1
data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
data/lib/statsample/regression/multiple/baseengine.rb +112 -113
data/lib/statsample/regression/multiple/gslengine.rb +91 -94
data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
data/lib/statsample/srs.rb +1 -1
data/lib/statsample/test.rb +0 -1
data/lib/statsample/test/umannwhitney.rb +8 -5
data/po/es/statsample.po +201 -39
data/po/statsample.pot +184 -32
data/test/test_bivariate.rb +21 -2
data/test/test_distribution.rb +58 -40
data/test/test_factor.rb +0 -1
data/test/test_gsl.rb +13 -14
data/test/test_regression.rb +1 -1
data/test/test_statistics.rb +1 -4
metadata +10 -21
data/demo/benchmark.rb +0 -76
data/demo/chi-square.rb +0 -44
data/demo/crosstab.rb +0 -7
data/demo/dice.rb +0 -13
data/demo/distribution_t.rb +0 -95
data/demo/graph.rb +0 -9
data/demo/item_analysis.rb +0 -30
data/demo/mean.rb +0 -81
data/demo/nunnally_6.rb +0 -34
data/demo/pca.rb +0 -29
data/demo/proportion.rb +0 -57
data/demo/regression.rb +0 -82
data/demo/sample_test.csv +0 -113
data/demo/spss_matrix.rb +0 -3
data/demo/strata_proportion.rb +0 -152
data/demo/stratum.rb +0 -141
data/demo/t-student.rb +0 -17
data/demo/umann.rb +0 -8
data/lib/matrix_extension.rb +0 -92

data/test/test_bivariate.rb CHANGED Viewed

@@ -24,6 +24,24 @@ class StatsampleBivariateTestCase < Test::Unit::TestCase
         assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
       end
     end
+  end
+  def test_polychoric
+    # Should be the same results as Tetrachoric for 2x2 matrix
+    matrix=Matrix[[rand(100)+10,rand(100)+10],[rand(100)+10,rand(100)+10]]
+    tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
+    poly  = Statsample::Bivariate::Polychoric.new(matrix)
+    assert_in_delta(tetra.r,poly.r,0.0001)
+    # Example for http://www.john-uebersax.com/stat/tetra.htm#exampl
+    matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
+    poly=Statsample::Bivariate::Polychoric.new(matrix)
+    assert_in_delta(0.4199, poly.r, 0.0001)
+    assert_in_delta(-0.2397, poly.threshold_y[0],0.001)
+    assert_in_delta(-0.0276, poly.threshold_x[0],0.001)
   end
   def test_tetrachoric
     a,b,c,d=0,0,0,0
@@ -51,8 +69,9 @@ class StatsampleBivariateTestCase < Test::Unit::TestCase
     tc  = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
     assert_in_delta(-0.53980,tc.r,0.0001)
     assert_in_delta(0.09940,tc.se,0.0001)
-    assert_in_delta(0.31864,tc.threshold_x,0.0001)
-    assert_in_delta(-0.15731,tc.threshold_y,0.0001)
+    assert_in_delta(-0.15731,tc.threshold_x, 0.0001)
+    assert_in_delta(0.31864,tc.threshold_y, 0.0001)
     x=%w{a a a a b b b a b b a a b b}.to_vector
     y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
     # crosstab

data/test/test_distribution.rb CHANGED Viewed

@@ -8,51 +8,69 @@ rescue LoadError
     NOT_GSL=true
 end
 class DistributionTestCase < Test::Unit::TestCase
-    def test_chi
-        if !NOT_GSL
-        [2,3,4,5].each{|k|
-            chis=rand()*10
-            area=Distribution::ChiSquare.cdf(chis, k)
-            assert_in_delta(area, GSL::Cdf.chisq_P(chis,k),0.0001)
-            assert_in_delta(chis, Distribution::ChiSquare.p_value(area,k),0.0001,"Error on prob #{area} and k #{k}")
-        }
-        end
+  def test_chi
+    if !NOT_GSL
+    [2,3,4,5].each{|k|
+        chis=rand()*10
+        area=Distribution::ChiSquare.cdf(chis, k)
+        assert_in_delta(area, GSL::Cdf.chisq_P(chis,k),0.0001)
+        assert_in_delta(chis, Distribution::ChiSquare.p_value(area,k),0.0001,"Error on prob #{area} and k #{k}")
+    }
     end
-    def test_t
-        if !NOT_GSL
-            [-2,0.1,0.5,1,2].each{|t|
-                [2,5,10].each{|n|
-                    area=Distribution::T.cdf(t,n)
-                    assert_in_delta(area, GSL::Cdf.tdist_P(t,n),0.0001)
-                    assert_in_delta(Distribution::T.p_value(area,n), GSL::Cdf.tdist_Pinv(area,n),0.0001)
-                }
+  end
+  def test_t
+    if !NOT_GSL
+        [-2,0.1,0.5,1,2].each{|t|
+            [2,5,10].each{|n|
+                area=Distribution::T.cdf(t,n)
+                assert_in_delta(area, GSL::Cdf.tdist_P(t,n),0.0001)
+                assert_in_delta(Distribution::T.p_value(area,n), GSL::Cdf.tdist_Pinv(area,n),0.0001)
             }
-        end
+        }
     end
-    def test_normal
-        if !NOT_GSL
-            [-2,0.1,0.5,1,2].each{|x|
-                area=Distribution::Normal.cdf(x)
-                assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
-                assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
-                assert_in_delta(Distribution::Normal.pdf(x), GSL::Ran::ugaussian_pdf(x),0.0001)
-            }
-        end
+  end
+  def test_normal
+    if !NOT_GSL
+        [-2,0.1,0.5,1,2].each{|x|
+            area=Distribution::Normal.cdf(x)
+            assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
+            assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
+            assert_in_delta(Distribution::Normal.pdf(x), GSL::Ran::ugaussian_pdf(x),0.0001)
+        }
     end
-    def test_f
-        if !NOT_GSL
-            [0.1,0.5,1,2,10,20,30].each{|f|
-                [2,5,10].each{|n2|
-                [2,5,10].each{|n1|
-                    area=Distribution::F.cdf(f,n1,n2)
-                    assert_in_delta(area, GSL::Cdf.fdist_P(f,n1,n2),0.0001)
-                    assert_in_delta(Distribution::F.p_value(area,n1,n2), GSL::Cdf.fdist_Pinv(area,n1,n2),0.0001)
-                }
-                }
+  end
+  def test_normal_bivariate
+    if !NOT_GSL
+      [0.2,0.4,0.6,0.8,0.9, 0.99,0.999,0.999999].each {|rho|
+        assert_equal(GSL::Ran::bivariate_gaussian_pdf(0, 0, 1,1,rho), Distribution::NormalBivariate.pdf(0,0, rho , 1,1))
+      }
+    end
+    [-3,-2,-1,0,1,1.5].each {|x|
+      assert_in_delta(Distribution::NormalBivariate.cdf_math(x,x,0.5), Distribution::NormalBivariate.cdf_iterate(x,x,0.5), 0.001)
+    }
+    assert_in_delta(0.686, Distribution::NormalBivariate.cdf(2,0.5,0.5), 0.001)
+    assert_in_delta(0.498, Distribution::NormalBivariate.cdf(2,0.0,0.5), 0.001)
+    assert_in_delta(0.671, Distribution::NormalBivariate.cdf(1.5,0.5,0.5), 0.001)
+    assert_in_delta(Distribution::Normal.cdf(0), Distribution::NormalBivariate.cdf(10,0,0.9), 0.001)
+  end
+  def test_f
+    if !NOT_GSL
+        [0.1,0.5,1,2,10,20,30].each{|f|
+            [2,5,10].each{|n2|
+            [2,5,10].each{|n1|
+                area=Distribution::F.cdf(f,n1,n2)
+                assert_in_delta(area, GSL::Cdf.fdist_P(f,n1,n2),0.0001)
+                assert_in_delta(Distribution::F.p_value(area,n1,n2), GSL::Cdf.fdist_Pinv(area,n1,n2),0.0001)
+            }
             }
-        end
+        }
     end
+  end
 end

data/test/test_factor.rb CHANGED Viewed

@@ -1,7 +1,6 @@
 $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 require 'test/unit'
-require 'matrix_extension'
 class StatsampleFactorTestCase < Test::Unit::TestCase
     def test_pca
       require 'gsl'

data/test/test_gsl.rb CHANGED Viewed

@@ -1,22 +1,21 @@
 $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 require 'test/unit'
-require 'matrix_extension'
 class StatsampleGSLTestCase < Test::Unit::TestCase
-    def test_matrix_to_gsl
-        if HAS_GSL
-        a=[1,2,3,4,20].to_vector(:scale)
-        b=[3,2,3,4,50].to_vector(:scale)
-        c=[6,2,3,4,3].to_vector(:scale)
-        ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
-        gsl=ds.to_matrix.to_gsl
-        assert_equal(5,gsl.size1)
-        assert_equal(3,gsl.size2)
-        matrix=gsl.to_matrix
-        assert_equal(5,matrix.row_size)
-        assert_equal(3,matrix.column_size)
-        end
+  def test_matrix_to_gsl
+    if HAS_GSL
+      a=[1,2,3,4,20].to_vector(:scale)
+      b=[3,2,3,4,50].to_vector(:scale)
+      c=[6,2,3,4,3].to_vector(:scale)
+      ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
+      gsl=ds.to_matrix.to_gsl
+      assert_equal(5,gsl.size1)
+      assert_equal(3,gsl.size2)
+      matrix=gsl.to_matrix
+      assert_equal(5,matrix.row_size)
+      assert_equal(3,matrix.column_size)
     end
+  end
 end

data/test/test_regression.rb CHANGED Viewed

@@ -50,7 +50,7 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
             assert_in_delta(residuals[i],c_residuals[i],0.001)
         }
 	else
-		puts "Regression::Multiple::AlglibEngine not tested (no Alglib)"
+		puts "Regression::Multiple::GslEngine not tested (no Gsl)"
 	end
     end

data/test/test_statistics.rb CHANGED Viewed

@@ -24,13 +24,10 @@ class StatsampleStatisicsTestCase < Test::Unit::TestCase
   end
   def test_chi_square
-    assert_raise TypeError do
-        Statsample::Test.chi_square(1,1)
-    end
     real=Matrix[[95,95],[45,155]]
     expected=Matrix[[68,122],[72,128]]
     assert_nothing_raised do
-        chi=Statsample::Test.chi_square(real,expected)
+      chi=Statsample::Test.chi_square(real,expected)
     end
     chi=Statsample::Test.chi_square(real,expected)
     assert_in_delta(32.53,chi,0.1)

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: statsample
 version: !ruby/object:Gem::Version
-  version: 0.6.1
+  version: 0.6.2
 platform: ruby
 authors:
 - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-02-08 00:00:00 -03:00
+date: 2010-02-11 00:00:00 -03:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -76,35 +76,23 @@ files:
 - data/test_binomial.csv
 - data/tetmat_matrix.txt
 - data/tetmat_test.txt
-- demo/benchmark.rb
-- demo/chi-square.rb
-- demo/crosstab.rb
-- demo/dice.rb
-- demo/distribution_t.rb
-- demo/graph.rb
-- demo/item_analysis.rb
-- demo/mean.rb
-- demo/nunnally_6.rb
-- demo/pca.rb
-- demo/proportion.rb
-- demo/regression.rb
-- demo/sample_test.csv
-- demo/spss_matrix.rb
-- demo/strata_proportion.rb
-- demo/stratum.rb
-- demo/t-student.rb
-- demo/umann.rb
+- demo/dominance_analysis_bootstrap.rb
+- demo/dominanceanalysis.rb
+- demo/multiple_regression.rb
+- demo/polychoric.rb
+- demo/tetrachoric.rb
 - lib/distribution.rb
 - lib/distribution/chisquare.rb
 - lib/distribution/f.rb
 - lib/distribution/normal.rb
+- lib/distribution/normalbivariate.rb
 - lib/distribution/t.rb
-- lib/matrix_extension.rb
 - lib/spss.rb
 - lib/statistics2.rb
 - lib/statsample.rb
 - lib/statsample/anova.rb
 - lib/statsample/bivariate.rb
+- lib/statsample/bivariate/polychoric.rb
 - lib/statsample/bivariate/tetrachoric.rb
 - lib/statsample/codification.rb
 - lib/statsample/combination.rb
@@ -127,6 +115,7 @@ files:
 - lib/statsample/graph/svgscatterplot.rb
 - lib/statsample/histogram.rb
 - lib/statsample/htmlreport.rb
+- lib/statsample/matrix.rb
 - lib/statsample/mle.rb
 - lib/statsample/mle/logit.rb
 - lib/statsample/mle/normal.rb

data/demo/benchmark.rb DELETED Viewed

@@ -1,76 +0,0 @@
-$:.unshift(File.dirname(__FILE__)+'/../lib/')
-require 'statsample'
-require 'benchmark'
-v=(0..10000).collect{|n|
-	r=rand(100)
-    if(r<90)
-    r
-    else
-        nil
-    end
-}.to_vector
-v.missing_values=[5,10,20]
-v.type=:scale
-a=[]
-b=[]
-c=[]
-(0..1000).each{|i|
-    a.push(rand())
-    b.push(rand())
-    c.push(rand())
-}
-ds=Statsample::Dataset.new({'a'=>a.to_vector(:scale),'b'=>b.to_vector(:scale), 'c'=>c.to_vector(:scale)})
- n = 300
- if (false)
-     Benchmark.bm(7) do |bench|
-         bench.report("missing or")   { for i in 1..n; v.each {|x|             !(x.nil? or v.missing_values.include? x) }; end }
-         bench.report("missing and")   { for i in 1..n;v.each {|x|             !x.nil? and !v.missing_values.include? x } ; end }
-    end
- end
- if (false)
-     Benchmark.bm(7) do |bench|
-         bench.report("true")   { Statsample::OPTIMIZED=true; for i in 1..n; v.set_valid_data ; end }
-         bench.report("false")   { Statsample::OPTIMIZED=false; for i in 1..n; v.set_valid_data ; end }
-    end
- end
- if (true)
-     Benchmark.bm(7) do |x|
-         x.report("Alglib coeffs")   { for i in 1..n; lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,"c"); lr.coeffs;          lr=nil;end }
-         x.report("GslEngine coeffs")   { for i in 1..n; lr=Statsample::Regression::Multiple::GslEngine.new(ds,"c"); lr.coeffs;lr=nil; end }
-     end
- end
- if(true)
-     Benchmark.bm(7) do |x|
-         x.report("Alglib process")   { for i in 1..n; lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,"c"); lr.process([rand(10),rand(10)]); end }
-         x.report("GslEngine process")   { for i in 1..n; lr=Statsample::Regression::Multiple::GslEngine.new(ds,"c"); lr.process([rand(10),rand(10)]); end }
-    end
- end
- if (false)
-    Benchmark.bm(7) do |x|
-		x.report("mean")   { for i in 1..n; v.mean; end }
-		x.report("slow_mean")   { for i in 1..n; v.mean_slow; end }
-    end
-    Benchmark.bm(7) do |x|
-		x.report("variance_sample")   { for i in 1..n; v.variance_sample; end }
-		x.report("variance_slow")   { for i in 1..n; v.slow_variance_sample; end }
-    end
-    Benchmark.bm(7) do |x|
-		x.report("Nominal.frequencies")   { for i in 1..n; v.frequencies; end }
-		x.report("Nominal.frequencies_slow")   { for i in 1..n; v.frequencies_slow; end }
-		x.report("_frequencies")   { for i in 1..n; Statsample._frequencies(v.valid_data); end }
-    end
-end

data/demo/chi-square.rb DELETED Viewed

@@ -1,44 +0,0 @@
-require File.dirname(__FILE__)+'/../lib/statsample'
-require 'rbgsl'
-require 'statsample/resample'
-require 'statsample/test'
-require 'matrix'
-ideal=Matrix[[30,30,40]]
-tests=10000
-monte=Statsample::Resample.repeat_and_save(tests) {
-	observed=[0,0,0]
-	(1..100).each{|i|
-		r=rand(100)
-		if r<30
-			observed[0]+=1
-		elsif r<60
-			observed[1]+=1
-		else
-			observed[2]+=1
-		end
-	}
-	Statsample::Test::chi_square(Matrix[observed],ideal)
-}
-v=monte.to_vector(:scale)
-x=[]
-y=[]
-y2=[]
-y3=[]
-y4=[]
-prev=0
-prev_chi=0
-v.frequencies.sort.each{|k,v1|
-	x.push(k)
-	y.push(prev+v1)
-	prev=prev+v1
-	cdf_chi=GSL::Cdf.chisq_P(k,2)
-	y2.push(cdf_chi)
-	y4.push(prev.quo(tests))
-}
-GSL::graph(GSL::Vector.alloc(x), GSL::Vector.alloc(y2), GSL::Vector.alloc(y4))

data/demo/crosstab.rb DELETED Viewed

@@ -1,7 +0,0 @@
-require './../lib/statsample'
-a=[1,1,1,1,1,1,1,2,2,2,2,2,3,3,3].to_vector
-b=[1,2,3,2,2,2,1,1,1,2,2,1,2,2,3].to_vector
-ct=Statsample::Crosstab.new(a,b)
-puts ct.summary

data/demo/dice.rb DELETED Viewed

@@ -1,13 +0,0 @@
-require File.dirname(__FILE__)+"/../lib/statsample"
-require 'statsample/srs'
-require 'statsample/resample'
-require 'gnuplot'
-tests=3000
-# rand a 50%
-monte_with=Statsample::Resample.repeat_and_save(tests) {
-    (1+rand(6))+(1+rand(6))
-}.to_vector(:scale)
-p monte_with.mean

data/demo/distribution_t.rb DELETED Viewed

@@ -1,95 +0,0 @@
-#!/usr/bin/ruby
-require File.dirname(__FILE__)+"/../lib/statsample"
-require 'statsample/resample'
-require 'gnuplot'
-r = GSL::Rng.alloc(GSL::Rng::TAUS, 1)
-v=[]
-population_size=10000
-population_size.times{|i|
-    v.push(r.ugaussian)
-}
-v=v.to_vector(:scale)
-vm=v.mean
-vsd=v.sdp
-puts "Population sd:#{v.sdp}"
-tests=3000
-Gnuplot.open do |gp|
-			Gnuplot::Plot.new( gp ) do |plot|
-			plot.boxwidth("0.9 absolute")
-			plot.xrange("[-3:3]")
-			plot.yrange("[0:1.1]")
-			plot.style("fill  solid 1.00 border -1")
-[2].each {|ss|
-    puts "Sample size:#{ss}"
-    ee=v.sdp.quo(Math::sqrt(ss))
-    puts "SE: #{ee}"
-    puts "Expected variance with replacement: #{v.variance_population.quo(ss)*(v.size-1).quo(v.size)}"
-    puts "Expected variance without replacement: #{v.variance_population.quo(ss)*(1-ss.quo(v.size))}"
-	sample_size=ss
-    sds_prom=[]
-    sds_prom_wo=[]
-	monte_wr=Statsample::Resample.repeat_and_save(tests) {
-		sample=v.sample_with_replacement(sample_size)
-        sds_prom.push(sample.sds)
-		sample.mean
-	}
-    monte_wor=Statsample::Resample.repeat_and_save(tests) {
-		sample=v.sample_without_replacement(sample_size)
-        sds_prom_wo.push(sample.sds)
-		sample.mean
-	}
-    xxz=[]
-    xxt=[]
-	xa=[]
-	xy=[]
-	xt=[]
-	xz=[]
-    s_wr=sds_prom.to_vector(:scale).mean
-    s_wor=sds_prom_wo.to_vector(:scale).mean
-	mw=monte_wr.to_vector(:scale)
-    mwo=monte_wor.to_vector(:scale)
-    puts "Sample variance with replacement: #{mw.variance_population}"
-    puts "Sample variance without replacement: #{monte_wor.to_vector(:scale).variance_population}"
-    puts "Mean sd estimadet :#{vsd*Math::sqrt(ss-1)}"
-    puts "Mean Sd W/R: #{s_wr}"
-    puts "Mean Sd WO/R: #{s_wor}"
-	mx=mw.mean
-    er=mw.sds
-	prev=0
-	mw.frequencies.sort.each{|x,y|
-        t=(x-vm).quo(s_wr.quo(Math::sqrt(ss))*s_wr.quo(ss-1))
-        z=(x-vm).quo(vsd.quo(Math::sqrt(ss)))
-		xxz.push(z)
-        xxt.push(t)
-		prev+=y
-		xy.push(prev.to_f/tests)
-		xt.push(Distribution::T.cdf(t, ss-1))
-		xz.push(Distribution::Normal.cdf(z))
-	}
-	plot.data << Gnuplot::DataSet.new( [xxt,xy] ) do |ds|
-		ds.with="lines"
-		ds.title = "sim #{sample_size}"
-	end
-	plot.data << Gnuplot::DataSet.new( [xxt,xt] ) do |ds|
-		ds.with="lines"
-		ds.title = "t #{sample_size}"
-	end
-	plot.data << Gnuplot::DataSet.new( [xxz,xz] ) do |ds|
-		ds.with="lines"
-		ds.title = "z"
-	end
-}
-end
-end