RubyGems - statsample - Versions diffs - 0.6.4 → 0.6.5 - Mend

statsample 0.6.4 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

data/History.txt +9 -0
data/README.txt +8 -4
data/demo/polychoric.rb +1 -2
data/lib/distribution/normalbivariate.rb +5 -5
data/lib/statsample.rb +1 -1
data/lib/statsample/bivariate/polychoric.rb +78 -1
data/lib/statsample/converters.rb +1 -1
data/lib/statsample/dominanceanalysis.rb +3 -2
data/lib/statsample/regression/multiple/baseengine.rb +1 -1
data/lib/statsample/regression/multiple/matrixengine.rb +58 -16
data/test/test_bivariate.rb +65 -38
data/test/test_csv.rb +5 -4
data/test/test_dataset.rb +4 -4
data/test/test_factor.rb +35 -27
data/test/test_matrix.rb +0 -1
data/test/test_regression.rb +4 -2
data/test/test_svg_graph.rb +8 -9
data/test/test_vector.rb +4 -4
data/test/test_xls.rb +4 -4
metadata +18 -18

data/History.txt CHANGED Viewed

@@ -1,3 +1,12 @@
+=== 0.6.5 / 2010-02-24
+* Bug fix on test: Use tempfile instead of tempdir
+* Multiple Regression: Calculation of constant standard error , using covariance matrix.
+* Calculation of R^2_yx and P^2_yx for Regresion on Multiple Dependents variables
+* Dominance Analysis could use Correlation or Covariance Matrix as input.
+* Dominance Analysis extension to multiple dependent variables (Azen & Budescu, 2006)
+* Two-step estimate of Polychoric correlation uses minimization gem, so could be executed without rb-gsl
 === 0.6.4 / 2010-02-19
 * Dominance Analysis and Dominance Analysis Bootstrap allows multivariate dependent analysis.
 * Test suite for Dominance Analysis, using Azen and Budescu papers as references

data/README.txt CHANGED Viewed

@@ -3,9 +3,11 @@
 http://ruby-statsample.rubyforge.org/
-== FEATURES:
+== DESCRIPTION:
+A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
-A suite for basic and advanced statistics. Includes:
+Includes:
 * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
 * Imports and exports datasets from and to Excel, CSV and plain text files.
 * Correlations: Pearson (r), Rho, Tetrachoric, Polychoric
@@ -14,7 +16,7 @@ A suite for basic and advanced statistics. Includes:
 * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
 * Sample calculation related formulas
-== DETAILED FEATURES:
+== FEATURES:
 * Factorial Analysis. Principal Component Analysis and Principal Axis extraction, with orthogonal rotations (Varimax, Equimax, Quartimax)
 * Multiple Regression. Listwise analysis optimized with use of Alglib library. Pairwise analysis is executed on pure ruby with matrixes and reports same values as SPSS
@@ -66,7 +68,9 @@ A suite for basic and advanced statistics. Includes:
 Optional:
 * Plotting: gnuplot and rbgnuplot, SVG::Graph
-* Factorial analysis and polychorical correlation: gsl library and rb-gsl (http://rb-gsl.rubyforge.org/). You should install it using <tt>gem install gsl</tt>
+* Factorial analysis and polychorical correlation(joint estimate and polychoric series): gsl library and rb-gsl (http://rb-gsl.rubyforge.org/). You should install it using <tt>gem install gsl</tt>.
+<b>Note</b>: Use gsl 1.12.109 or later.
 == DOWNLOAD
 * Gems and bugs report: http://rubyforge.org/projects/ruby-statsample/

data/demo/polychoric.rb CHANGED Viewed

@@ -5,10 +5,9 @@ require 'statsample'
 ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
 # Estimation of polychoric correlation using two-step (default)
-poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step")
+poly=Statsample::Bivariate::Polychoric.new(ct, :name=>"Polychoric with two-step", :debug=>true)
 puts poly.summary
 # Estimation of polychoric correlation using joint method (slow)
 poly=Statsample::Bivariate::Polychoric.new(ct, :method=>:joint, :name=>"Polychoric with joint")
 puts poly.summary

data/lib/distribution/normalbivariate.rb CHANGED Viewed

@@ -19,10 +19,9 @@ module Distribution
       # Probability density function for a given x, y and rho value.
       #
       # Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
-      def pdf(x,y, rho, sigma1=1.0, sigma2=1.0)
-        (1.quo(2 * Math::PI * sigma1*sigma2 * Math::sqrt( 1 - rho**2 ))) *
-          Math::exp(-(1.quo(2*(1-rho**2))) *
-          ((x**2/sigma1) + (y**2/sigma2) - (2*rho*x*y).quo(sigma1*sigma2)  ))
+      def pdf(x,y, rho, s1=1.0, s2=1.0)
+        1.quo(2 * Math::PI * s1 * s2 * Math::sqrt( 1 - rho**2 )) * (Math::exp(-(1.quo(2*(1-rho**2))) *
+          ((x**2.quo(s1)) + (y**2.quo(s2)) - (2*rho*x*y).quo(s1*s2))))
       end
       def f(x,y,aprime,bprime,rho)
@@ -147,7 +146,8 @@ module Distribution
         r=rho
         twopi = 6.283185307179586
-        w=11.times.collect {[nil]*4}; x=11.times.collect {[nil]*4}
+        w=11.times.collect {[nil]*4};
+        x=11.times.collect {[nil]*4}
         data=[
         0.1713244923791705E+00, -0.9324695142031522E+00,

data/lib/statsample.rb CHANGED Viewed

@@ -109,7 +109,7 @@ end
 # * Dataset: An union of vectors.
 #
 module Statsample
-  VERSION = '0.6.4'
+  VERSION = '0.6.5'
   SPLIT_TOKEN = ","
   autoload(:Database, 'statsample/converters')
   autoload(:Anova, 'statsample/anova')

data/lib/statsample/bivariate/polychoric.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+require 'minimization'
 module Statsample
   module Bivariate
     # Calculate Polychoric correlation for two vectors.
@@ -79,7 +80,7 @@ module Statsample
       METHOD=:two_step
       MAX_ITERATIONS=300
-      EPSILON=0.000001
+      EPSILON=1e-6
       MINIMIZER_TYPE_TWO_STEP="brent"
       MINIMIZER_TYPE_JOINT="nmsimplex"
       def new_with_vectors(v1,v2)
@@ -184,6 +185,52 @@ module Statsample
       def chi_square_df
         (@nr*@nc)-@nc-@nr
       end
+      def loglike_fd_rho(alpha,beta,rho)
+        if rho.abs>0.9999
+          rho= (rho>0) ? 0.9999 : -0.9999
+        end
+         #puts "rho: #{rho}"
+        loglike=0
+        pd=@nr.times.collect{ [0]*@nc}
+        pc=@nr.times.collect{ [0]*@nc}
+        @nr.times { |i|
+          @nc.times { |j|
+            if i==@nr-1 and j==@nc-1
+              pd[i][j]=1.0
+              a=100
+              b=100
+            else
+              a=(i==@nr-1) ? 100: alpha[i]
+              b=(j==@nc-1) ? 100: beta[j]
+              pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
+            end
+            pc[i][j] = pd[i][j]
+            pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
+            pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
+            pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
+            pij= pd[i][j]+EPSILON
+            if i==0
+              alpha_m1=-10
+            else
+              alpha_m1=alpha[i-1]
+            end
+            if j==0
+              beta_m1=-10
+            else
+              beta_m1=beta[j-1]
+            end
+            loglike+= (@matrix[i,j].quo(pij))*(Distribution::NormalBivariate.pdf(a,b,rho) - Distribution::NormalBivariate.pdf(alpha_m1, b,rho) - Distribution::NormalBivariate.pdf(a, beta_m1,rho) + Distribution::NormalBivariate.pdf(alpha_m1, beta_m1,rho) )
+          }
+        }
+        #puts "derivative: #{loglike}"
+        -loglike
+      end
       def loglike(alpha,beta,rho)
         if rho.abs>0.9999
           rho= (rho>0) ? 0.9999 : -0.9999
@@ -249,6 +296,8 @@ module Statsample
           ac=@sumcac[i]
         end
       end
       # Computation of polychoric correlation usign two-step ML estimation.
       #
       # Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
@@ -260,6 +309,34 @@ module Statsample
       # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
       #
       def compute_two_step_mle_drasgow
+        if HAS_GSL
+          compute_two_step_mle_drasgow_gsl
+        else
+          compute_two_step_mle_drasgow_ruby
+        end
+      end
+      # Depends on minimization algorithm.
+      def compute_two_step_mle_drasgow_ruby #:nodoc:
+        f=proc {|rho|
+          loglike(@alpha,@beta, rho)
+        }
+        @log="Minimizing using GSL Brent method\n"
+        min=Minimization::Brent.new(-0.9999,0.9999,f)
+        min.epsilon=@epsilon
+        min.expected=0
+        min.iterate
+        @log+=min.log
+        @r=min.x_minimum
+        @loglike_model=-min.f_minimum
+        puts @log if @debug
+      end
+      def compute_two_step_mle_drasgow_gsl #:nodoc:
         fn1=GSL::Function.alloc {|rho|
           loglike(@alpha,@beta, rho)

data/lib/statsample/converters.rb CHANGED Viewed

@@ -86,7 +86,7 @@ module Statsample
           end
         }
 =end
-raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.count>0
+raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.size>0
         fields=row.to_a.collect{|c| c.downcase}
         fields.recode_repeated
       end

data/lib/statsample/dominanceanalysis.rb CHANGED Viewed

@@ -112,6 +112,7 @@ module Statsample
         @method_association=:r2
       end
+      @name=nil
       opts.each{|k,v|
         self.send("#{k}=",v) if self.respond_to? k
       }
@@ -133,7 +134,7 @@ module Statsample
         raise ArgumentError.new("You should use a Matrix or a Dataset")
       end
       @models=nil
+      @models_data=nil
     end
     # Compute models.
     def compute
@@ -227,7 +228,7 @@ module Statsample
             dominances.push(0)
         else
           return 0.5
-            dominances.push(0.5)
+            #dominances.push(0.5)
         end
       end
       final=dominances.uniq

data/lib/statsample/regression/multiple/baseengine.rb CHANGED Viewed

@@ -125,7 +125,7 @@ module Statsample
           out={}
           mse=sse.quo(df_e)
           coeffs.each {|k,v|
-            out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares*tolerance(k)))
+            out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
           }
           out
         end

data/lib/statsample/regression/multiple/matrixengine.rb CHANGED Viewed

@@ -35,22 +35,36 @@ class MatrixEngine < BaseEngine
   def initialize(matrix,y_var, opts=Hash.new)
     matrix.extend Statsample::CovariateMatrix
     raise "#{y_var} variable should be on data" unless matrix.fields.include? y_var
-    @matrix_cor=matrix.correlation
+    if matrix.type==:covariance
+      @matrix_cov=matrix
+      @matrix_cor=matrix.correlation
+      @no_covariance=false
+    else
+      @matrix_cor=matrix
+      @matrix_cov=matrix
+      @no_covariance=true
+    end
     @y_var=y_var
     @fields=matrix.fields-[y_var]
     @n_predictors=@fields.size
-    @matrix=matrix
-    @matrix_x= matrix.submatrix(@fields)
-    @matrix_y = matrix.submatrix(@fields, [y_var])
-    @matrix_y_cor=@matrix_cor.submatrix(@fields, [y_var])
-    @result_matrix=@matrix_x.inverse * @matrix_y
-    @y_sd=Math::sqrt(@matrix.submatrix([y_var])[0,0])
-    @x_sd=@matrix_x.row_size.times.inject({}) {|ac,i|
-      ac[@matrix_x.fields[i]]=Math::sqrt(@matrix_x[i,i])
+    @matrix_x= @matrix_cor.submatrix(@fields)
+    @matrix_x_cov= @matrix_cov.submatrix(@fields)
+    @matrix_y = @matrix_cor.submatrix(@fields, [y_var])
+    @matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
+    @y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
+    @x_sd=@n_predictors.times.inject({}) {|ac,i|
+      ac[@matrix_x_cov.fields[i]]=Math::sqrt(@matrix_x_cov[i,i])
       ac;
     }
     @cases=nil
     @x_mean=@fields.inject({}) {|ac,f|
       ac[f]=0.0
@@ -64,13 +78,15 @@ class MatrixEngine < BaseEngine
     opts.each{|k,v|
         self.send("#{k}=",v) if self.respond_to? k
     }
+      result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
     if matrix.type==:covariance
-      @coeffs=@result_matrix.column(0).to_a
+      @coeffs=result_matrix.column(0).to_a
       @coeffs_stan=coeffs.collect {|k,v|
         coeffs[k]*@x_sd[k].quo(@y_sd)
       }
     else
-      @coeffs_stan=@result_matrix.column(0).to_a
+      @coeffs_stan=result_matrix.column(0).to_a
       @coeffs=standarized_coeffs.collect {|k,v|
         standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
@@ -87,8 +103,7 @@ class MatrixEngine < BaseEngine
   # * 1-(|R| / |R_x|) or
   # * Sum(b_i*r_yi)
   def r2
-    @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y_cor[i,0]}
-    #1-(@matrix.correlation.determinant.quo(@matrix_x.correlation.determinant))
+    @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
   end
   def r
     Math::sqrt(r2)
@@ -141,9 +156,36 @@ class MatrixEngine < BaseEngine
     }
     out
   end
-# Standard error for constant
+  def constant_t
+    return nil if constant_se.nil?
+    constant.to_f/constant_se
+  end
+  # Standard error for constant.
+  # Recreate the estimaded variance-covariance matrix
+  # using means, standard deviation and covariance matrix
   def constant_se
-   nil
+    return nil if @no_covariance
+    means=@x_mean
+    #means[@y_var]=@y_mean
+    means[:constant]=1
+    sd=@x_sd
+    #sd[@y_var]=@y_sd
+    sd[:constant]=0
+    fields=[:constant]+@matrix_cov.fields-[@y_var]
+    xt_x=Matrix.rows(fields.collect {|i|
+      fields.collect {|j|
+        if i==:constant or j==:constant
+          cov=0
+        elsif i==j
+          cov=sd[i]**2
+        else
+          cov=@matrix_cov.submatrix(i..i,j..j)[0,0]
+        end
+        cov*(@cases-1)+@cases*means[i]*means[j]
+      }
+    })
+    matrix=xt_x.inverse * mse
+    matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
   end
   def to_reportbuilder(generator)

data/test/test_bivariate.rb CHANGED Viewed

@@ -25,47 +25,74 @@ class StatsampleBivariateTestCase < Test::Unit::TestCase
       end
     end
   end
+  def test_poly_vs_tetra
+    10.times {
+      # Should be the same results as Tetrachoric for 2x2 matrix
+      matrix=Matrix[[150+rand(10),1000+rand(20)],[1000+rand(20),200+rand(20)]]
+      tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
+      poly  = Statsample::Bivariate::Polychoric.new(matrix)
+      poly.compute_two_step_mle_drasgow_ruby
+      assert_in_delta(tetra.r,poly.r,0.0001)
+      if HAS_GSL
+        poly.compute_two_step_mle_drasgow_gsl
+        assert_in_delta(tetra.r,poly.r,0.0001)
+      end
+    }
+  end
   def test_polychoric
-    # Should be the same results as Tetrachoric for 2x2 matrix
-    matrix=Matrix[[rand(100)+10,rand(100)+10],[rand(100)+10,rand(100)+10]]
-    tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
-    poly  = Statsample::Bivariate::Polychoric.new(matrix)
-    assert_in_delta(tetra.r,poly.r,0.0001)
-    # Example for Tallis(1962, cited by Drasgow, 2006)
-    matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
-    poly=Statsample::Bivariate::Polychoric.new(matrix)
-    poly.method=:two_step
-    poly.compute
-    assert_in_delta(0.420, poly.r, 0.001)
-    assert_in_delta(-0.240, poly.threshold_y[0],0.001)
-    assert_in_delta(-0.027, poly.threshold_x[0],0.001)
-    assert_in_delta(1.578, poly.threshold_y[1],0.001)
-    assert_in_delta(1.137, poly.threshold_x[1],0.001)
-    poly.method=:polychoric_series
-    poly.compute
-    assert_in_delta(0.556, poly.r, 0.001)
-    assert_in_delta(-0.240, poly.threshold_y[0],0.001)
-    assert_in_delta(-0.027, poly.threshold_x[0],0.001)
-    assert_in_delta(1.578, poly.threshold_y[1],0.001)
-    assert_in_delta(1.137, poly.threshold_x[1],0.001)
-    poly.method=:joint
-    poly.compute
-    assert_in_delta(0.4192, poly.r, 0.0001)
-    assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
-    assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
-    assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
-    assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
+      matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
+      poly=Statsample::Bivariate::Polychoric.new(matrix)
+      poly.compute_two_step_mle_drasgow_ruby
+      assert_in_delta(0.420, poly.r, 0.001)
+      assert_in_delta(-0.240, poly.threshold_y[0],0.001)
+      assert_in_delta(-0.027, poly.threshold_x[0],0.001)
+      assert_in_delta(1.578, poly.threshold_y[1],0.001)
+      assert_in_delta(1.137, poly.threshold_x[1],0.001)
+    if HAS_GSL
+      poly.method=:polychoric_series
+      poly.compute
+      assert_in_delta(0.556, poly.r, 0.001)
+      assert_in_delta(-0.240, poly.threshold_y[0],0.001)
+      assert_in_delta(-0.027, poly.threshold_x[0],0.001)
+      assert_in_delta(1.578, poly.threshold_y[1],0.001)
+      assert_in_delta(1.137, poly.threshold_x[1],0.001)
+      # Example for Tallis(1962, cited by Drasgow, 2006)
+      matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
+      poly=Statsample::Bivariate::Polychoric.new(matrix)
+      poly.compute_two_step_mle_drasgow_gsl
+      assert_in_delta(0.420, poly.r, 0.001)
+      assert_in_delta(-0.240, poly.threshold_y[0],0.001)
+      assert_in_delta(-0.027, poly.threshold_x[0],0.001)
+      assert_in_delta(1.578, poly.threshold_y[1],0.001)
+      assert_in_delta(1.137, poly.threshold_x[1],0.001)
+      poly.method=:joint
+      poly.compute
+      assert_in_delta(0.4192, poly.r, 0.0001)
+      assert_in_delta(-0.2421, poly.threshold_y[0],0.0001)
+      assert_in_delta(-0.0297, poly.threshold_x[0],0.0001)
+      assert_in_delta(1.5938, poly.threshold_y[1],0.0001)
+      assert_in_delta(1.1331, poly.threshold_x[1],0.0001)
+    else
+      puts "Two-step optimized, polychoric series and Joint method for Polychoric  requires GSL"
+    end
   end
   def test_tetrachoric
     a,b,c,d=0,0,0,0

data/test/test_csv.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
-require 'tmpdir'
+require "tempfile"
 require 'test/unit'
 class StatsampleCSVTestCase < Test::Unit::TestCase
@@ -33,9 +33,10 @@ class StatsampleCSVTestCase < Test::Unit::TestCase
       assert_equal(age,ds['age_2'])
     end
     def test_write
-        filename=Dir::tmpdir+"/test_write.csv"
-        Statsample::CSV.write(@ds,filename)
-        ds2=Statsample::CSV.read(filename)
+      filename=Tempfile.new("afile")
+      #  filename=Dir::tmpdir+"/test_write.csv"
+        Statsample::CSV.write(@ds, filename.path)
+        ds2=Statsample::CSV.read(filename.path)
         i=0
         ds2.each_array{|row|
             assert_equal(@ds.case_as_array(i),row)

data/test/test_dataset.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 require 'test/unit'
-require 'tmpdir'
+require 'tempfile'
 class StatsampleDatasetTestCase < Test::Unit::TestCase
   def setup
     @ds=Statsample::Dataset.new({'id' => Statsample::Vector.new([1,2,3,4,5]), 'name'=>Statsample::Vector.new(%w{Alex Claude Peter Franz George}), 'age'=>Statsample::Vector.new([20,23,25,27,5]),
@@ -13,9 +13,9 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
     assert_equal(%w{id name age city a1}, @ds.fields)
   end
   def test_saveload
-  outfile=Dir::tmpdir+"/dataset.ds"
-  @ds.save(outfile)
-  a=Statsample.load(outfile)
+    outfile=Tempfile.new("/dataset.ds")
+  @ds.save(outfile.path)
+  a=Statsample.load(outfile.path)
   assert_equal(@ds,a)
   end

data/test/test_factor.rb CHANGED Viewed

@@ -3,35 +3,43 @@ require 'statsample'
 require 'test/unit'
 class StatsampleFactorTestCase < Test::Unit::TestCase
     def test_pca
-      require 'gsl'
-      a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
-      b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
-      a.recode! {|c| c-a.mean}
-      b.recode! {|c| c-b.mean}
-      ds={'a'=>a,'b'=>b}.to_dataset
-      cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
-      pca=Statsample::Factor::PCA.new(cov_matrix)
-      expected_eigenvalues=[1.284, 0.0490]
-      expected_eigenvalues.each_with_index{|ev,i|
-        assert_in_delta(ev,pca.eigenvalues[i],0.001)
-      }
-      expected_fm_1=GSL::Matrix[[0.677], [0.735]]
-      expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
-      _test_matrix(expected_fm_1,pca.feature_vector(1))
-      _test_matrix(expected_fm_2,pca.feature_vector(2))
+      if HAS_GSL
+        require 'gsl'
+        a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
+        b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
+        a.recode! {|c| c-a.mean}
+        b.recode! {|c| c-b.mean}
+        ds={'a'=>a,'b'=>b}.to_dataset
+        cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
+        pca=Statsample::Factor::PCA.new(cov_matrix)
+        expected_eigenvalues=[1.284, 0.0490]
+        expected_eigenvalues.each_with_index{|ev,i|
+          assert_in_delta(ev,pca.eigenvalues[i],0.001)
+        }
+        expected_fm_1=GSL::Matrix[[0.677], [0.735]]
+        expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
+        _test_matrix(expected_fm_1,pca.feature_vector(1))
+        _test_matrix(expected_fm_2,pca.feature_vector(2))
+      else
+        puts "PCA not tested. Requires GSL"
+      end
     end
     def test_rotation_varimax
-      a = Matrix[ [ 0.4320,  0.8129,  0.3872]  ,
-       [0.7950, -0.5416,  0.2565]  ,
-       [0.5944,  0.7234, -0.3441],
-       [0.8945, -0.3921, -0.1863] ]
-       expected= Matrix[[-0.0204423,     0.938674,    -0.340334],
-       [0.983662, 0.0730206, 0.134997],
-       [0.0826106, 0.435975, -0.893379],
-       [0.939901, -0.0965213, -0.309596]].to_gsl
-       varimax=Statsample::Factor::Varimax.new(a)
-       varimax.iterate
-       _test_matrix(expected,varimax.rotated)
+      if HAS_GSL
+        a = Matrix[ [ 0.4320,  0.8129,  0.3872]  ,
+         [0.7950, -0.5416,  0.2565]  ,
+         [0.5944,  0.7234, -0.3441],
+         [0.8945, -0.3921, -0.1863] ]
+         expected= Matrix[[-0.0204423,     0.938674,    -0.340334],
+         [0.983662, 0.0730206, 0.134997],
+         [0.0826106, 0.435975, -0.893379],
+         [0.939901, -0.0965213, -0.309596]].to_gsl
+         varimax=Statsample::Factor::Varimax.new(a)
+         varimax.iterate
+         _test_matrix(expected,varimax.rotated)
+       else
+         puts "Rotation not tested. Requires GSL"
+       end
     end
     def _test_matrix(a,b)
       a.size1.times {|i|

data/test/test_matrix.rb CHANGED Viewed

@@ -19,7 +19,6 @@ class StatsampleMatrixTestCase < Test::Unit::TestCase
       assert_equal(6,matrix.row_sum[0])
       assert_equal(12,matrix.column_sum[0])
       assert_equal(45,matrix.total_sum)
-      m=matrix.to_gsl
     end
     def test_covariate
       a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]

data/test/test_regression.rb CHANGED Viewed

@@ -158,12 +158,14 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
     cor=Statsample::Bivariate.correlation_matrix(ds)
     lr=Statsample::Regression::Multiple::MatrixEngine.new(cor,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size, :y_sd=>@y.sd , :x_sd=>{'a' => @a.sd, 'b' => @b.sd, 'c' => @c.sd})
+    assert_nil(lr.constant_se)
+    assert_nil(lr.constant_t)
     model_test_matrix(lr, "correlation matrix")
     covariance=Statsample::Bivariate.covariance_matrix(ds)
     lr=Statsample::Regression::Multiple::MatrixEngine.new(covariance,'y', :y_mean=>@y.mean, :x_mean=>{'a'=>ds['a'].mean, 'b'=>ds['b'].mean, 'c'=>ds['c'].mean}, :cases=>@a.size)
-    model_test_matrix(lr , "covariance matrix")
+    model_test(lr , "covariance matrix")
   end
   def test_regression_rubyengine
     @a=[nil,1,3,2,4,3,5,4,6,5,7].to_vector(:scale)

data/test/test_svg_graph.rb CHANGED Viewed

@@ -2,6 +2,7 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 require 'tmpdir'
 require 'tempfile'
+require 'tempfile'
 require 'fileutils'
 require 'test/unit'
 begin
@@ -19,12 +20,10 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
     rand(10)
     }.to_vector(:scale)
       h=ar.histogram([0,2,5,11])
-      file=@image_path+"/svg_histogram_only.svg"
+      file=Tempfile.new("svg_histogram_only.svg")
       graph = Statsample::Graph::SvgHistogram.new({})
       graph.histogram=h
-      File.open(file,"w") {|f|
-            f.puts(graph.burn)
-      }
+      file.puts(graph.burn)
     else
     puts "Statsample::Graph::SvgHistogram.new not tested (no ruby-gsl)"
     end
@@ -36,20 +35,20 @@ class StatsampleSvgGraphTestCase < Test::Unit::TestCase
       ar.push(rand(10))
     }
     vector=ar.to_vector
-    file=@image_path+"/svggraph_default.svg"
+    file=Tempfile.new("svggraph_default.svg").path
     vector.svggraph_frequencies(file)
-    file=@image_path+"/svggraph_Bar.svg"
+    file=Tempfile.new("svggraph_bar.svg").path
     vector.svggraph_frequencies(file,800,600,SVG::Graph::Bar,:graph_title=>'Bar')
     assert(File.exists?(file))
-    file=@image_path+"/svggraph_BarHorizontal.svg"
+    file=Tempfile.new("svggraph_bar_horizontal.svg").path
     vector.svggraph_frequencies(file,800,600,SVG::Graph::BarHorizontalNoOp,:graph_title=>'Horizontal Bar')
     assert(File.exists?(file))
-    file=@image_path+"/svggraph_Pie.svg"
+    file=Tempfile.new("svggraph_pie.svg").path
     vector.svggraph_frequencies(file,800,600,SVG::Graph::PieNoOp,:graph_title=>'Pie')
     assert(File.exists?(file))
     vector.type=:scale
     if HAS_GSL
-      file=@image_path+"/svggraph_histogram.svg"
+    file=Tempfile.new("svg_histogram.svg").path
       hist=vector.svggraph_histogram(5)
       File.open(file,"wb") {|fp|
               fp.write(hist.burn)

data/test/test_vector.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 require 'test/unit'
-require 'tmpdir'
+require 'tempfile'
 class TestStatsample
 end
 class TestStatsample::TestVector < Test::Unit::TestCase
@@ -12,9 +12,9 @@ class TestStatsample::TestVector < Test::Unit::TestCase
     end
     def test_save_load
-        outfile=Dir::tmpdir+"/vector.vec"
-        @c.save(outfile)
-        a=Statsample.load(outfile)
+        outfile=Tempfile.new("vector.vec")
+        @c.save(outfile.path)
+        a=Statsample.load(outfile.path)
         assert_equal(@c,a)
     end

data/test/test_xls.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 require 'test/unit'
-require 'tmpdir'
+require 'tempfile'
 begin
 	require 'spreadsheet'
 rescue LoadError
@@ -30,9 +30,9 @@ class StatsampleExcelTestCase < Test::Unit::TestCase
         assert_equal(nil,@ds['age'][5])
     end
     def test_write
-        filename=Dir::tmpdir+"/test_write.xls"
-        Statsample::Excel.write(@ds,filename)
-        ds2=Statsample::Excel.read(filename)
+      tempfile=Tempfile.new("test_write.xls")
+        Statsample::Excel.write(@ds,tempfile.path)
+        ds2=Statsample::Excel.read(tempfile.path)
         i=0
         ds2.each_array{|row|
             assert_equal(@ds.case_as_array(i),row)

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: statsample
 version: !ruby/object:Gem::Version
-  version: 0.6.4
+  version: 0.6.5
 platform: ruby
 authors:
 - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-02-19 00:00:00 -03:00
+date: 2010-02-24 00:00:00 -03:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -43,24 +43,14 @@ dependencies:
         version: 0.2.0
     version:
 - !ruby/object:Gem::Dependency
-  name: rubyforge
-  type: :development
-  version_requirement:
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 2.0.3
-    version:
-- !ruby/object:Gem::Dependency
-  name: gemcutter
-  type: :development
+  name: minimization
+  type: :runtime
   version_requirement:
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.3.0
+        version: 0.1.0
     version:
 - !ruby/object:Gem::Dependency
   name: hoe
@@ -70,9 +60,19 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 2.5.0
+        version: 2.4.0
     version:
-description: ""
+description: |-
+  A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, Ruby 1.9 and JRuby 1.4 (Ruby 1.8.7 compatible)
+  Includes:
+  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
+  * Imports and exports datasets from and to Excel, CSV and plain text files.
+  * Correlations: Pearson (r), Rho, Tetrachoric, Polychoric
+  * Regression: Simple, Multiple, Probit and Logit
+  * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
+  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
+  * Sample calculation related formulas
 email:
 - clbustos@gmail.com
 executables:
@@ -221,7 +221,7 @@ rubyforge_project: ruby-statsample
 rubygems_version: 1.3.5
 signing_key:
 specification_version: 3
-summary: ""
+summary: A suite for basic and advanced statistics on Ruby
 test_files:
 - test/test_bivariate.rb
 - test/test_dominance_analysis.rb