RubyGems - statsample - Versions diffs - 0.6.4 → 0.6.5 - Mend

statsample 0.6.4 → 0.6.5

Files changed (20) hide show

data/History.txt +9 -0
data/README.txt +8 -4
data/demo/polychoric.rb +1 -2
data/lib/distribution/normalbivariate.rb +5 -5
data/lib/statsample.rb +1 -1
data/lib/statsample/bivariate/polychoric.rb +78 -1
data/lib/statsample/converters.rb +1 -1
data/lib/statsample/dominanceanalysis.rb +3 -2
data/lib/statsample/regression/multiple/baseengine.rb +1 -1
data/lib/statsample/regression/multiple/matrixengine.rb +58 -16
data/test/test_bivariate.rb +65 -38
data/test/test_csv.rb +5 -4
data/test/test_dataset.rb +4 -4
data/test/test_factor.rb +35 -27
data/test/test_matrix.rb +0 -1
data/test/test_regression.rb +4 -2
data/test/test_svg_graph.rb +8 -9
data/test/test_vector.rb +4 -4
data/test/test_xls.rb +4 -4
metadata +18 -18

data/History.txt CHANGED Viewed

@@ -1,3 +1,12 @@
+=== 0.6.5 / 2010-02-24
+* Bug fix on test: Use tempfile instead of tempdir
+* Multiple Regression: Calculation of constant standard error , using covariance matrix.
+* Calculation of R^2_yx and P^2_yx for Regresion on Multiple Dependents variables
+* Dominance Analysis could use Correlation or Covariance Matrix as input.
+* Dominance Analysis extension to multiple dependent variables (Azen & Budescu, 2006)
+* Two-step estimate of Polychoric correlation uses minimization gem, so could be executed without rb-gsl
 === 0.6.4 / 2010-02-19
 * Dominance Analysis and Dominance Analysis Bootstrap allows multivariate dependent analysis.
 * Test suite for Dominance Analysis, using Azen and Budescu papers as references

data/README.txt CHANGED Viewed

@@ -3,9 +3,11 @@
 http://ruby-statsample.rubyforge.org/
-== FEATURES:
+== DESCRIPTION:
+A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
-A suite for basic and advanced statistics. Includes:
+Includes:
 * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
 * Imports and exports datasets from and to Excel, CSV and plain text files.
 * Correlations: Pearson (r), Rho, Tetrachoric, Polychoric
@@ -14,7 +16,7 @@ A suite for basic and advanced statistics. Includes:
 * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
 * Sample calculation related formulas
-== DETAILED FEATURES:
+== FEATURES:
 * Factorial Analysis. Principal Component Analysis and Principal Axis extraction, with orthogonal rotations (Varimax, Equimax, Quartimax)
 * Multiple Regression. Listwise analysis optimized with use of Alglib library. Pairwise analysis is executed on pure ruby with matrixes and reports same values as SPSS
@@ -66,7 +68,9 @@ A suite for basic and advanced statistics. Includes:
 Optional:
 * Plotting: gnuplot and rbgnuplot, SVG::Graph
-* Factorial analysis and polychorical correlation: gsl library and rb-gsl (http://rb-gsl.rubyforge.org/). You should install it using <tt>gem install gsl</tt>
+* Factorial analysis and polychorical correlation(joint estimate and polychoric series): gsl library and rb-gsl (http://rb-gsl.rubyforge.org/). You should install it using <tt>gem install gsl</tt>.
+<b>Note</b>: Use gsl 1.12.109 or later.
 == DOWNLOAD
 * Gems and bugs report: http://rubyforge.org/projects/ruby-statsample/

data/demo/polychoric.rb CHANGED Viewed

@@ -5,10 +5,9 @@ require 'statsample'
 ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
 # Estimation of polychoric correlation using two-step (default)
-poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step")
+poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step", :debug=>true)
 puts poly.summary
 # Estimation of polychoric correlation using joint method (slow)
 poly=Statsample::Bivariate::Polychoric.new(ct, :method=>:joint, :name=>"Polychoric with joint")
 puts poly.summary

data/lib/distribution/normalbivariate.rb CHANGED Viewed

@@ -19,10 +19,9 @@ module Distribution
       # Probability density function for a given x, y and rho value.
       #
       # Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
-      def pdf(x,y, rho, sigma1=1.0, sigma2=1.0)
-        (1.quo(2 * Math::PI * sigma1*sigma2 * Math::sqrt( 1 - rho**2 ))) *
-          Math::exp(-(1.quo(2*(1-rho**2))) *
-          ((x**2/sigma1) + (y**2/sigma2) - (2*rho*x*y).quo(sigma1*sigma2)  ))
+      def pdf(x,y, rho, s1=1.0, s2=1.0)
+        1.quo(2 * Math::PI * s1 * s2 * Math::sqrt( 1 - rho**2 )) * (Math::exp(-(1.quo(2*(1-rho**2))) *
+          ((x**2.quo(s1)) + (y**2.quo(s2)) - (2*rho*x*y).quo(s1*s2))))
       end
       def f(x,y,aprime,bprime,rho)
@@ -147,7 +146,8 @@ module Distribution
         r=rho
         twopi = 6.283185307179586
-        w=11.times.collect {[nil]*4}; x=11.times.collect {[nil]*4}
+        w=11.times.collect {[nil]*4};
+        x=11.times.collect {[nil]*4}
         data=[
         0.1713244923791705E+00, -0.9324695142031522E+00,

data/lib/statsample.rb CHANGED Viewed

@@ -109,7 +109,7 @@ end
 # * Dataset: An union of vectors.
 #
 module Statsample
-  VERSION = '0.6.4'
+  VERSION = '0.6.5'
   SPLIT_TOKEN = ","
   autoload(:Database, 'statsample/converters')
   autoload(:Anova, 'statsample/anova')

data/lib/statsample/bivariate/polychoric.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+require 'minimization'
 module Statsample
   module Bivariate
     # Calculate Polychoric correlation for two vectors.
@@ -79,7 +80,7 @@ module Statsample
       METHOD=:two_step
       MAX_ITERATIONS=300
-      EPSILON=0.000001
+      EPSILON=1e-6
       MINIMIZER_TYPE_TWO_STEP="brent"
       MINIMIZER_TYPE_JOINT="nmsimplex"
       def new_with_vectors(v1,v2)
@@ -184,6 +185,52 @@ module Statsample
       def chi_square_df
         (@nr*@nc)-@nc-@nr
       end
+      def loglike_fd_rho(alpha,beta,rho)
+        if rho.abs>0.9999
+          rho= (rho>0) ? 0.9999 : -0.9999
+        end
+         #puts "rho: #{rho}"
+        loglike=0
+        pd=@nr.times.collect{ [0]*@nc}
+        pc=@nr.times.collect{ [0]*@nc}
+        @nr.times { |i|
+          @nc.times { |j|
+            if i==@nr-1 and j==@nc-1
+              pd[i][j]=1.0
+              a=100
+              b=100
+            else
+              a=(i==@nr-1) ? 100: alpha[i]
+              b=(j==@nc-1) ? 100: beta[j]
+              pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
+            end
+            pc[i][j] = pd[i][j]
+            pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
+            pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
+            pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
+            pij= pd[i][j]+EPSILON
+            if i==0
+              alpha_m1=-10
+            else
+              alpha_m1=alpha[i-1]
+            end
+            if j==0
+              beta_m1=-10
+            else
+              beta_m1=beta[j-1]
+            end
+            loglike+= (@matrix[i,j].quo(pij))*(Distribution::NormalBivariate.pdf(a,b,rho) - Distribution::NormalBivariate.pdf(alpha_m1, b,rho) - Distribution::NormalBivariate.pdf(a, beta_m1,rho) + Distribution::NormalBivariate.pdf(alpha_m1, beta_m1,rho) )
+          }
+        }
+        #puts "derivative: #{loglike}"
+        -loglike
+      end
       def loglike(alpha,beta,rho)
         if rho.abs>0.9999
           rho= (rho>0) ? 0.9999 : -0.9999
@@ -249,6 +296,8 @@ module Statsample
           ac=@sumcac[i]
         end
       end
       # Computation of polychoric correlation usign two-step ML estimation.
       #
       # Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
@@ -260,6 +309,34 @@ module Statsample
       # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
       #
       def compute_two_step_mle_drasgow
+        if HAS_GSL
+          compute_two_step_mle_drasgow_gsl
+        else
+          compute_two_step_mle_drasgow_ruby
+        end
+      end
+      # Depends on minimization algorithm.
+      def compute_two_step_mle_drasgow_ruby #:nodoc:
+        f=proc {|rho|
+          loglike(@alpha,@beta, rho)
+        }
+        @log="Minimizing using GSL Brent method\n"
+        min=Minimization::Brent.new(-0.9999,0.9999,f)
+        min.epsilon=@epsilon
+        min.expected=0
+        min.iterate
+        @log+=min.log
+        @r=min.x_minimum
+        @loglike_model=-min.f_minimum
+        puts @log if @debug
+      end
+      def compute_two_step_mle_drasgow_gsl #:nodoc:
         fn1=GSL::Function.alloc {|rho|
           loglike(@alpha,@beta, rho)

data/lib/statsample/converters.rb CHANGED Viewed

@@ -86,7 +86,7 @@ module Statsample
           end
         }
 =end
-raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.count>0
+raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.size>0
         fields=row.to_a.collect{|c| c.downcase}
         fields.recode_repeated
       end

data/lib/statsample/dominanceanalysis.rb CHANGED Viewed

@@ -112,6 +112,7 @@ module Statsample
         @method_association=:r2
       end
+      @name=nil
       opts.each{|k,v|
         self.send("#{k}=",v) if self.respond_to? k
       }
@@ -133,7 +134,7 @@ module Statsample
         raise ArgumentError.new("You should use a Matrix or a Dataset")
       end
       @models=nil
+      @models_data=nil
     end
     # Compute models.
     def compute
@@ -227,7 +228,7 @@ module Statsample
             dominances.push(0)
         else
           return 0.5
-            dominances.push(0.5)
+            #dominances.push(0.5)
         end
       end
       final=dominances.uniq

data/lib/statsample/regression/multiple/baseengine.rb CHANGED Viewed

@@ -125,7 +125,7 @@ module Statsample
           out={}
           mse=sse.quo(df_e)
           coeffs.each {|k,v|
-            out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
+            out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
           }
           out
         end

data/lib/statsample/regression/multiple/matrixengine.rb CHANGED Viewed

@@ -35,22 +35,36 @@ class MatrixEngine < BaseEngine
   def initialize(matrix,y_var, opts=Hash.new)
     matrix.extend Statsample::CovariateMatrix
     raise "#{y_var} variable should be on data" unless matrix.fields.include? y_var
-    @matrix_cor=matrix.correlation
+    if matrix.type==:covariance
+      @matrix_cov=matrix
+      @matrix_cor=matrix.correlation
+      @no_covariance=false
+    else
+      @matrix_cor=matrix
+      @matrix_cov=matrix
+      @no_covariance=true
+    end
     @y_var=y_var
     @fields=matrix.fields-[y_var]
     @n_predictors=@fields.size
-    @matrix=matrix
-    @matrix_x= matrix.submatrix(@fields)
-    @matrix_y = matrix.submatrix(@fields, [y_var])
-    @matrix_y_cor=@matrix_cor.submatrix(@fields, [y_var])
-    @result_matrix=@matrix_x.inverse * @matrix_y
-    @y_sd=Math::sqrt(@matrix.submatrix([y_var])[0,0])
-    @x_sd=@matrix_x.row_size.times.inject({}) {|ac,i|
-      ac[@matrix_x.fields[i]]=Math::sqrt(@matrix_x[i,i])
+    @matrix_x= @matrix_cor.submatrix(@fields)
+    @matrix_x_cov= @matrix_cov.submatrix(@fields)
+    @matrix_y = @matrix_cor.submatrix(@fields, [y_var])
+    @matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
+    @y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
+    @x_sd=@n_predictors.times.inject({}) {|ac,i|
+      ac[@matrix_x_cov.fields[i]]=Math::sqrt(@matrix_x_cov[i,i])
       ac;
     }
     @cases=nil
     @x_mean=@fields.inject({}) {|ac,f|
       ac[f]=0.0
@@ -64,13 +78,15 @@ class MatrixEngine < BaseEngine
     opts.each{|k,v|
         self.send("#{k}=",v) if self.respond_to? k
     }
+      result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
     if matrix.type==:covariance
-      @coeffs=@result_matrix.column(0).to_a
+      @coeffs=result_matrix.column(0).to_a
       @coeffs_stan=coeffs.collect {|k,v|
         coeffs[k]*@x_sd[k].quo(@y_sd)
       }
     else
-      @coeffs_stan=@result_matrix.column(0).to_a
+      @coeffs_stan=result_matrix.column(0).to_a
       @coeffs=standarized_coeffs.collect {|k,v|
         standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
@@ -87,8 +103,7 @@ class MatrixEngine < BaseEngine
   # * 1-(|R| / |R_x|) or
   # * Sum(b_i*r_yi)
   def r2
-    @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y_cor[i,0]}
-    #1-(@matrix.correlation.determinant.quo(@matrix_x.correlation.determinant))
+    @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
   end
   def r
     Math::sqrt(r2)
@@ -141,9 +156,36 @@ class MatrixEngine < BaseEngine
     }
     out
   end
-# Standard error for constant
+  def constant_t
+    return nil if constant_se.nil?
+    constant.to_f/constant_se
+  end
+  # Standard error for constant.
+  # Recreate the estimaded variance-covariance matrix
+  # using means, standard deviation and covariance matrix
   def constant_se
-   nil
+    return nil if @no_covariance
+    means=@x_mean
+    #means[@y_var]=@y_mean
+    means[:constant]=1
+    sd=@x_sd
+    #sd[@y_var]=@y_sd
+    sd[:constant]=0
+    fields=[:constant]+@matrix_cov.fields-[@y_var]
+    xt_x=Matrix.rows(fields.collect {|i|
+      fields.collect {|j|
+        if i==:constant or j==:constant
+          cov=0
+        elsif i==j
+          cov=sd[i]**2
+        else
+          cov=@matrix_cov.submatrix(i..i,j..j)[0,0]
+        end
+        cov*(@cases-1)+@cases*means[i]*means[j]
+      }
+    })
+    matrix=xt_x.inverse * mse
+    matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
   end
   def to_reportbuilder(generator)

data/test/test_bivariate.rb CHANGED Viewed

@@ -25,47 +25,74 @@ class StatsampleBivariateTestCase < Test::Unit::TestCase
       end
     end
   end
+  def test_poly_vs_tetra
+    10.times {
+      # Should be the same results as Tetrachoric for 2x2 matrix
+      matrix=Matrix[[150+rand(10),1000+rand(20)],[1000+rand(20),200+rand(20)]]
+      tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
+      poly  = Statsample::Bivariate::Polychoric.new(matrix)
+      poly.compute_two_step_mle_drasgow_ruby
+      assert_in_delta(tetra.r,poly.r,0.0001)
+      if HAS_GSL
+        poly.compute_two_step_mle_drasgow_gsl
+        assert_in_delta(tetra.r,poly.r,0.0001)
+      end
+    }
+  end
   def test_polychoric
-    # Should be the same results as Tetrachoric for 2x2 matrix
-    matrix=Matrix[[rand(100)+10,rand(100)+10],[rand(100)+10,rand(100)+10]]
-    tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
-    poly  = Statsample::Bivariate::Polychoric.new(matrix)
-    assert_in_delta(tetra.r,poly.r,0.0001)
-    # Example for Tallis(1962, cited by Drasgow, 2006)
-    matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
-    poly=Statsample::Bivariate::Polychoric.new(matrix)
-    poly.method=:two_step
-    poly.compute
-    assert_in_delta(0.420, poly.r, 0.001)
-    assert_in_delta(-0.240, poly.threshold_y[0],0.001)
-    assert_in_delta(-0.027, poly.threshold_x[0],0.001)
-    assert_in_delta(1.578, poly.threshold_y[1],0.001)
-    assert_in_delta(1.137, poly.threshold_x[1],0.001)
-    poly.method=:polychoric_series
-    poly.compute
-    assert_in_delta(0.556, poly.r, 0.001)
-    assert_in_delta(-0.240, poly.threshold_y[0],0.001)
-    assert_in_delta(-0.027, poly.threshold_x[0],0.001)
-    assert_in_delta(1.578, poly.threshold_y[1],0.001)
-    assert_in_delta(1.137, poly.threshold_x[1],0.001)
-    poly.method=:joint
-    poly.compute
-    assert_in_delta(0.4192, poly.r, 0.0001)
-    assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
-    assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
-    assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
-    assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
+      matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
+      poly=Statsample::Bivariate::Polychoric.new(matrix)
+      poly.compute_two_step_mle_drasgow_ruby
+      assert_in_delta(0.420, poly.r, 0.001)
+      assert_in_delta(-0.240, poly.threshold_y[0],0.001)
+      assert_in_delta(-0.027, poly.threshold_x[0],0.001)
+      assert_in_delta(1.578, poly.threshold_y[1],0.001)
+      assert_in_delta(1.137, poly.threshold_x[1],0.001)
+    if HAS_GSL
+      poly.method=:polychoric_series
+      poly.compute
+      assert_in_delta(0.556, poly.r, 0.001)
+      assert_in_delta(-0.240, poly.threshold_y[0],0.001)
+      assert_in_delta(-0.027, poly.threshold_x[0],0.001)
+      assert_in_delta(1.578, poly.threshold_y[1],0.001)
+      assert_in_delta(1.137, poly.threshold_x[1],0.001)
+      # Example for Tallis(1962, cited by Drasgow, 2006)
+      matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
+      poly=Statsample::Bivariate::Polychoric.new(matrix)
+      poly.compute_two_step_mle_drasgow_gsl
+      assert_in_delta(0.420, poly.r, 0.001)
+      assert_in_delta(-0.240, poly.threshold_y[0],0.001)
+      assert_in_delta(-0.027, poly.threshold_x[0],0.001)
+      assert_in_delta(1.578, poly.threshold_y[1],0.001)
+      assert_in_delta(1.137, poly.threshold_x[1],0.001)
+      poly.method=:joint
+      poly.compute
+      assert_in_delta(0.4192, poly.r, 0.0001)
+      assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
+      assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
+      assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
+      assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
+    else
+      puts "Two-step optimized, polychoric series and Joint method for Polychoric  requires GSL"
+    end
   end
   def test_tetrachoric
     a,b,c,d=0,0,0,0

data/test/test_csv.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
-require 'tmpdir'
+require "tempfile"
 require 'test/unit'
 class StatsampleCSVTestCase < Test::Unit::TestCase
@@ -33,9 +33,10 @@ class StatsampleCSVTestCase < Test::Unit::TestCase
       assert_equal(age,ds['age_2'])
     end
     def test_write
-        filename=Dir::tmpdir+"/test_write.csv"
-        Statsample::CSV.write(@ds,filename)
-        ds2=Statsample::CSV.read(filename)
+      filename=Tempfile.new("afile")
+      #  filename=Dir::tmpdir+"/test_write.csv"
+        Statsample::CSV.write(@ds, filename.path)
+        ds2=Statsample::CSV.read(filename.path)
         i=0
         ds2.each_array{|row|
             assert_equal(@ds.case_as_array(i),row)

data/test/test_dataset.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 require 'test/unit'
-require 'tmpdir'
+require 'tempfile'
 class StatsampleDatasetTestCase < Test::Unit::TestCase
   def setup
     @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
@@ -13,9 +13,9 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
     assert_equal(%w{id name age city a1}, @ds.fields)
   end
   def test_saveload
-  outfile=Dir::tmpdir+"/dataset.ds"
-  @ds.save(outfile)
-  a=Statsample.load(outfile)
+    outfile=Tempfile.new("/dataset.ds")
+  @ds.save(outfile.path)
+  a=Statsample.load(outfile.path)
   assert_equal(@ds,a)
   end

data/test/test_factor.rb CHANGED Viewed

@@ -3,35 +3,43 @@ require 'statsample'
 require 'test/unit'
 class StatsampleFactorTestCase < Test::Unit::TestCase
     def test_pca
-      require 'gsl'
-      a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
-      b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
-      a.recode! {|c| c-a.mean}
-      b.recode! {|c| c-b.mean}
-      ds={'a'=>a,'b'=>b}.to_dataset
-      cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
-      pca=Statsample::Factor::PCA.new(cov_matrix)
-      expected_eigenvalues=[1.284, 0.0490]
-      expected_eigenvalues.each_with_index{|ev,i|
-        assert_in_delta(ev,pca.eigenvalues[i],0.001)
-      }
-      expected_fm_1=GSL::Matrix[[0.677], [0.735]]
-      expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
-      _test_matrix(expected_fm_1,pca.feature_vector(1))
-      _test_matrix(expected_fm_2,pca.feature_vector(2))
+      if HAS_GSL
+        require 'gsl'
+        a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
+        b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
+        a.recode! {|c| c-a.mean}
+        b.recode! {|c| c-b.mean}
+        ds={'a'=>a,'b'=>b}.to_dataset
+        cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
+        pca=Statsample::Factor::PCA.new(cov_matrix)
+        expected_eigenvalues=[1.284, 0.0490]
+        expected_eigenvalues.each_with_index{|ev,i|
+          assert_in_delta(ev,pca.eigenvalues[i],0.001)
+        }
+        expected_fm_1=GSL::Matrix[[0.677], [0.735]]
+        expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
+        _test_matrix(expected_fm_1,pca.feature_vector(1))
+        _test_matrix(expected_fm_2,pca.feature_vector(2))
+      else
+        puts "PCA not tested. Requires GSL"
+      end
     end
     def test_rotation_varimax
-      a = Matrix[ [ 0.4320,  0.8129,  0.3872]  ,
-       [0.7950, -0.5416,  0.2565]  ,
-       [0.5944,  0.7234, -0.3441],
-       [0.8945, -0.3921, -0.1863] ]
-       expected= Matrix[[-0.0204423,     0.938674,    -0.340334],
-       [0.983662, 0.0730206, 0.134997],
-       [0.0826106, 0.435975, -0.893379],
-       [0.939901, -0.0965213, -0.309596]].to_gsl
-       varimax=Statsample::Factor::Varimax.new(a)
-       varimax.iterate
-       _test_matrix(expected,varimax.rotated)
+      if HAS_GSL
+        a = Matrix[ [ 0.4320,  0.8129,  0.3872]  ,
+         [0.7950, -0.5416,  0.2565]  ,
+         [0.5944,  0.7234, -0.3441],
+         [0.8945, -0.3921, -0.1863] ]
+         expected= Matrix[[-0.0204423,     0.938674,    -0.340334],
+         [0.983662, 0.0730206, 0.134997],
+         [0.0826106, 0.435975, -0.893379],
+         [0.939901, -0.0965213, -0.309596]].to_gsl
+         varimax=Statsample::Factor::Varimax.new(a)
+         varimax.iterate
+         _test_matrix(expected,varimax.rotated)
+       else
+         puts "Rotation not tested. Requires GSL"
+       end
     end
     def _test_matrix(a,b)
       a.size1.times {|i|

data/test/test_matrix.rb CHANGED Viewed

@@ -19,7 +19,6 @@ class StatsampleMatrixTestCase < Test::Unit::TestCase
       assert_equal(6,matrix.row_sum[0])
       assert_equal(12,matrix.column_sum[0])
       assert_equal(45,matrix.total_sum)
-      m=matrix.to_gsl
     end
     def test_covariate
       a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]

data/test/test_regression.rb CHANGED Viewed

@@ -158,12 +158,14 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
     cor=Statsample::Bivariate.correlation_matrix(ds)
     lr=Statsample::Regression::Multiple::MatrixEngine.new(cor,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size, :y_sd=>@y.sd , :x_sd=>{'a' => @a.sd, 'b' => @b.sd, 'c' => @c.sd})
+    assert_nil(lr.constant_se)
+    assert_nil(lr.constant_t)
     model_test_matrix(lr, "correlation matrix")
     covariance=Statsample::Bivariate.covariance_matrix(ds)
     lr=Statsample::Regression::Multiple::MatrixEngine.new(covariance,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size)
-    model_test_matrix(lr , "covariance matrix")
+    model_test(lr , "covariance matrix")
   end
   def test_regression_rubyengine
     @a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)

data/test/test_svg_graph.rb CHANGED Viewed

@@ -2,6 +2,7 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 require 'tmpdir'
 require 'tempfile'
+require 'tempfile'
 require 'fileutils'
 require 'test/unit'
 begin
@@ -19,12 +20,10 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
     rand(10)
     }.to_vector(:scale)
       h=ar.histogram([0,2,5,11])
-      file=@image_path+"/svg_histogram_only.svg"
+      file=Tempfile.new("svg_histogram_only.svg")
       graph = Statsample::Graph::SvgHistogram.new({})
       graph.histogram=h
-      File.open(file,"w") {|f|
-            f.puts(graph.burn)
-      }
+      file.puts(graph.burn)
     else
     puts "Statsample::Graph::SvgHistogram.new not tested (no ruby-gsl)"
     end
@@ -36,20 +35,20 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
       ar.push(rand(10))
     }
     vector=ar.to_vector
-    file=@image_path+"/svggraph_default.svg"
+    file=Tempfile.new("svggraph_default.svg").path
     vector.svggraph_frequencies(file)
-    file=@image_path+"/svggraph_Bar.svg"
+    file=Tempfile.new("svggraph_bar.svg").path
     vector.svggraph_frequencies(file,800,600,SVG::Graph::Bar,:graph_title=>'Bar')
     assert(File.exists?(file))
-    file=@image_path+"/svggraph_BarHorizontal.svg"
+    file=Tempfile.new("svggraph_bar_horizontal.svg").path
     vector.svggraph_frequencies(file,800,600,SVG::Graph::BarHorizontalNoOp,:graph_title=>'Horizontal Bar')
     assert(File.exists?(file))
-    file=@image_path+"/svggraph_Pie.svg"
+    file=Tempfile.new("svggraph_pie.svg").path
     vector.svggraph_frequencies(file,800,600,SVG::Graph::PieNoOp,:graph_title=>'Pie')
     assert(File.exists?(file))
     vector.type=:scale
     if HAS_GSL
-      file=@image_path+"/svggraph_histogram.svg"
+    file=Tempfile.new("svg_histogram.svg").path
       hist=vector.svggraph_histogram(5)
       File.open(file,"wb") {|fp|
               fp.write(hist.burn)

data/test/test_vector.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 require 'test/unit'
-require 'tmpdir'
+require 'tempfile'
 class TestStatsample
 end
 class TestStatsample::TestVector < Test::Unit::TestCase
@@ -12,9 +12,9 @@ class TestStatsample::TestVector < Test::Unit::TestCase
     end
     def test_save_load
-        outfile=Dir::tmpdir+"/vector.vec"
-        @c.save(outfile)
-        a=Statsample.load(outfile)
+        outfile=Tempfile.new("vector.vec")
+        @c.save(outfile.path)
+        a=Statsample.load(outfile.path)
         assert_equal(@c,a)
     end

data/test/test_xls.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 require 'test/unit'
-require 'tmpdir'
+require 'tempfile'
 begin
 	require 'spreadsheet'
 rescue LoadError
@@ -30,9 +30,9 @@ class StatsampleExcelTestCase < Test::Unit::TestCase
         assert_equal(nil,@ds['age'][5])
     end
     def test_write
-        filename=Dir::tmpdir+"/test_write.xls"
-        Statsample::Excel.write(@ds,filename)
-        ds2=Statsample::Excel.read(filename)
+      tempfile=Tempfile.new("test_write.xls")
+        Statsample::Excel.write(@ds,tempfile.path)
+        ds2=Statsample::Excel.read(tempfile.path)
         i=0
         ds2.each_array{|row|
             assert_equal(@ds.case_as_array(i),row)

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: statsample
 version: !ruby/object:Gem::Version
-  version: 0.6.4
+  version: 0.6.5
 platform: ruby
 authors:
 - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-02-19 00:00:00 -03:00
+date: 2010-02-24 00:00:00 -03:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -43,24 +43,14 @@ dependencies:
         version: 0.2.0
     version:
 - !ruby/object:Gem::Dependency
-  name: rubyforge
-  type: :development
-  version_requirement:
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 2.0.3
-    version:
-- !ruby/object:Gem::Dependency
-  name: gemcutter
-  type: :development
+  name: minimization
+  type: :runtime
   version_requirement:
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.3.0
+        version: 0.1.0
     version:
 - !ruby/object:Gem::Dependency
   name: hoe
@@ -70,9 +60,19 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 2.5.0
+        version: 2.4.0
     version:
-description: ""
+description: |-
+  A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
+  Includes:
+  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
+  * Imports and exports datasets from and to Excel, CSV and plain text files.
+  * Correlations: Pearson (r), Rho, Tetrachoric, Polychoric
+  * Regression: Simple, Multiple, Probit and Logit
+  * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
+  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
+  * Sample calculation related formulas
 email:
 - clbustos@gmail.com
 executables:
@@ -221,7 +221,7 @@ rubyforge_project: ruby-statsample
 rubygems_version: 1.3.5
 signing_key:
 specification_version: 3
-summary: ""
+summary: A suite for basic and advanced statistics on Ruby
 test_files:
 - test/test_bivariate.rb
 - test/test_dominance_analysis.rb