RubyGems - statsample-bivariate-extension - Versions diffs - 0.13.2 → 0.13.3 - Mend

statsample-bivariate-extension 0.13.2 → 0.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

data.tar.gz.sig +0 -0
data/History.txt +7 -0
data/Manifest.txt +3 -0
data/Rakefile +10 -0
data/lib/statsample/bivariate/extension_version.rb +1 -1
data/lib/statsample/bivariate/polychoric.rb +118 -200
data/lib/statsample/bivariate/polychoric/processor.rb +261 -0
data/spec/spec.opts +3 -0
data/spec/statsample/bivariate/polychoric_processor_spec.rb +47 -0
data/spec/statsample/bivariate/polychoric_spec.rb +38 -30
metadata +6 -3
metadata.gz.sig +0 -0

data.tar.gz.sig CHANGED Viewed

Binary file

data/History.txt CHANGED Viewed

@@ -1,3 +1,10 @@
+=== 0.13.3 / 2010-06-22
+* Bug fix on Processor.fd_loglike_cell_rho.
+* Chi square added on spec.
+* Olsson (1979) added on references
+* Derivative version of joint estimate operational
 === 0.13.1 / 2010-06-21
 * Better specs. Bug fix on Manifest.txt

data/Manifest.txt CHANGED Viewed

@@ -6,7 +6,10 @@ data/tetmat_matrix.txt
 data/tetmat_test.txt
 lib/statsample/bivariate/extension_version.rb
 lib/statsample/bivariate/polychoric.rb
+lib/statsample/bivariate/polychoric/processor.rb
 lib/statsample/bivariate/tetrachoric.rb
+spec/spec.opts
 spec/spec_helper.rb
+spec/statsample/bivariate/polychoric_processor_spec.rb
 spec/statsample/bivariate/polychoric_spec.rb
 spec/statsample/bivariate/tetrachoric_spec.rb

data/Rakefile CHANGED Viewed

@@ -2,6 +2,7 @@
 # -*- ruby -*-
 require 'rubygems'
+require 'spec/rake/spectask'
 require 'hoe'
 Hoe.plugin :git
@@ -15,4 +16,13 @@ Hoe.spec 'statsample-bivariate-extension' do
   self.developer('Claudio Bustos', 'clbustos_at_gmail.com')
 end
+desc "Run all spec with RCov"
+Spec::Rake::SpecTask.new('test_with_rcov') do |t|
+  t.spec_files = FileList['spec/**/*.rb']
+  t.rcov = true
+  t.rcov_opts = ['--exclude', 'spec']
+end
 # vim: syntax=ruby

data/lib/statsample/bivariate/extension_version.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module Statsample
   module Bivariate
-    EXTENSION_VERSION="0.13.2"
+    EXTENSION_VERSION="0.13.3"
   end
 end

data/lib/statsample/bivariate/polychoric.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 require 'minimization'
+require 'statsample/bivariate/polychoric/processor'
 module Statsample
   module Bivariate
     # Calculate Polychoric correlation for two vectors.
@@ -44,11 +45,11 @@ module Statsample
     # the polychoric correlation:
     #
     # 1. Maximum Likehood Estimator
-    # 2. Two-step estimator and
+    # 2. Two-step estimator
     # 3. Polychoric series estimate.
     #
     # By default, two-step estimation are used. You can select
-    # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
+    # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl. Joint estimate uses Olsson(1979) derivatives and two-step uses a derivative free method.
     #
     # == Use
     #
@@ -71,69 +72,13 @@ module Statsample
     #
     # == References
     #
-    # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
     # * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
+    # * Olsson, U. (1979) Maximum likelihood estimation of the polychoric correlation coefficient. Psychometrika 44, 443-460.
+    # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
     class Polychoric
       include Summarizable
-      class Processor
-        attr_reader :alpha, :beta, :rho
-        def initialize(alpha,beta,rho)
-          @alpha=alpha
-          @beta=beta
-          @nr=@alpha.size+1
-          @nc=@beta.size+1
-          @rho=rho
-          @pd=nil
-        end
-        def bipdf(i,j)
-           Distribution::NormalBivariate.pdf(a(i), b(j), rho)
-        end
-        def a(i)
-          i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
-        end
-        def b(j)
-          j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
-        end
-        # Equation(10) from Olsson(1979)
-        def fd_loglike_cell_a(i,j,k)
-          if k==i
-            Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
-          elsif k==(i-1)
-            -Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
-          else
-            0
-          end
-        end
-        # phi_ij for each i and j
-        # Uses equation(4) from Olsson(1979)
-        def pd
-          if @pd.nil?
-            @pd=@nr.times.collect{ [0] * @nc}
-            pc=@nr.times.collect{ [0] * @nc}
-            @nr.times do |i|
-            @nc.times do |j|
-              if i==@nr-1 and j==@nc-1
-                @pd[i][j]=1.0
-              else
-                a=(i==@nr-1) ? 100: alpha[i]
-                b=(j==@nc-1) ? 100: beta[j]
-                #puts "a:#{a} b:#{b}"
-                @pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
-              end
-              pc[i][j] = @pd[i][j]
-              @pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
-              @pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
-              @pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
-            end
-            end
-          end
-          @pd
-        end
-      end
       include DirtyMemoize
       # Name of the analysis
       attr_accessor :name
@@ -258,133 +203,8 @@ module Statsample
       def chi_square_df
         (@nr*@nc)-@nc-@nr
       end
-      # Retrieve all cell probabilities for givens alpha, beta and rho
-      def cell_probabilities(alpha,beta,rho)
-        pd=@nr.times.collect{ [0] * @nc}
-        pc=@nr.times.collect{ [0] * @nc}
-        @nr.times do |i|
-          @nc.times do |j|
-            if i==@nr-1 and j==@nc-1
-              pd[i][j]=1.0
-            else
-              a=(i==@nr-1) ? 100: alpha[i]
-              b=(j==@nc-1) ? 100: beta[j]
-              #puts "a:#{a} b:#{b}"
-              pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
-            end
-            pc[i][j] = pd[i][j]
-            pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
-            pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
-            pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
-          end
-        end
-        @pd=pd
-        pd
-      end
-      def loglike(alpha,beta,rho)
-        if rho.abs>0.9999
-          rho= (rho>0) ? 0.9999 : -0.9999
-        end
-        pr=Processor.new(alpha,beta,rho)
-        loglike=0
-        @nr.times do |i|
-          @nc.times do |j|
-            res=pr.pd[i][j]+EPSILON
-            loglike+= @matrix[i,j]  * Math::log( res )
-          end
-        end
-        -loglike
-      end
-      # First derivate for rho
-      # Uses equation (9) from Olsson(1979)
-      def fd_loglike_rho(alpha,beta,rho)
-        if rho.abs>0.9999
-          rho= (rho>0) ? 0.9999 : -0.9999
-        end
-        total=0
-        pr=Processor.new(alpha,beta,rho)
-        @nr.times do |i|
-          @nc.times do |j|
-            pi=pr.pd[i][j] + EPSILON
-            total+= (@matrix[i,j] / pi)  * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
-          end
-        end
-        total
-      end
-      # First derivative for alpha_k
-      def fd_loglike_a(alpha,beta,rho,k)
-        fd_loglike_a_eq6(alpha,beta,rho,k)
-      end
-      # Uses equation (6) from Olsson(1979)
-      def fd_loglike_a_eq6(alpha,beta,rho,k)
-        if rho.abs>0.9999
-          rho= (rho>0) ? 0.9999 : -0.9999
-        end
-        pr=Processor.new(alpha,beta,rho)
-        total=0
-        pd=pr.pd
-        @nr.times do |i|
-          @nc.times  do |j|
-            total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
-          end
-        end
-        total
-      end
-      # Uses equation(13) from Olsson(1979)
-      def fd_loglike_a_eq13(alpha,beta,rho,k)
-        if rho.abs>0.9999
-          rho= (rho>0) ? 0.9999 : -0.9999
-        end
-        pr=Processor.new(alpha,beta,rho)
-        total=0
-        a_k=pr.a(k)
-        pd=pr.pd
-        @nc.times do |j|
-          #puts "j: #{j}"
-          #puts "b #{j} : #{b.call(j)}"
-          #puts "b #{j-1} : #{b.call(j-1)}"
-          e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
-          e_2=Distribution::Normal.pdf(a_k)
-          e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
-          #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
-          total+= e_1*e_2*e_3
-        end
-        total
-      end
-      # First derivative for beta_m
-      # Uses equation(14) from Olsson(1979)
-      def fd_loglike_b(alpha,beta,rho,m)
-        if rho.abs>0.9999
-          rho= (rho>0) ? 0.9999 : -0.9999
-        end
-        pr=Processor.new(alpha,beta,rho)
-        total=0
-        b_m=pr.b m
-        pd=pr.pd
-        @nr.times do |i|
-          #puts "j: #{j}"
-          #puts "b #{j} : #{b.call(j)}"
-          #puts "b #{j-1} : #{b.call(j-1)}"
-          e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
-          e_2=Distribution::Normal.pdf(b_m)
-          e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
-          #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
-          total+= e_1*e_2*e_3
-        end
-        total
-      end
       def compute_basic_parameters
@@ -442,7 +262,8 @@ module Statsample
       def compute_two_step_mle_drasgow_ruby #:nodoc:
         f=proc {|rho|
-          loglike(@alpha,@beta, rho)
+          pr=Processor.new(@alpha,@beta, rho, @matrix)
+          pr.loglike
         }
         @log=_("Minimizing using GSL Brent method\n")
         min=Minimization::Brent.new(-0.9999,0.9999,f)
@@ -459,8 +280,9 @@ module Statsample
       def compute_two_step_mle_drasgow_gsl #:nodoc:
-      fn1=GSL::Function.alloc {|rho|
-        loglike(@alpha,@beta, rho)
+      fn1=GSL::Function.alloc {|rho|
+        pr=Processor.new(@alpha,@beta, rho, @matrix)
+        pr.loglike
       }
       @iteration = 0
       max_iter = @max_iterations
@@ -498,12 +320,101 @@ module Statsample
       @loglike_model=-gmf.f_minimum
       end
+      def compute_derivatives_vector(v,df)
+        new_rho=v[0]
+        new_alpha=v[1, @nr-1]
+        new_beta=v[@nr, @nc-1]
+        if new_rho.abs>0.9999
+          new_rho= (new_rho>0) ? 0.9999 : -0.9999
+        end
+        pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
+        df[0]=-pr.fd_loglike_rho
+        new_alpha.to_a.each_with_index {|v,i|
+          df[i+1]=-pr.fd_loglike_a(i)
+        }
+        offset=new_alpha.size+1
+        new_beta.to_a.each_with_index {|v,i|
+          df[offset+i]=-pr.fd_loglike_b(i)
+        }
+      end
+      def compute_one_step_mle
+        compute_one_step_mle_with_derivatives
+      end
+      def compute_one_step_mle_with_derivatives
+        # Get initial values with two-step aproach
+        compute_two_step_mle_drasgow
+        # Start iteration with past values
+        rho=@r
+        cut_alpha=@alpha
+        cut_beta=@beta
+        parameters=[rho]+cut_alpha+cut_beta
+        np=@nc-1+@nr
+        loglike_f = Proc.new { |v, params|
+          new_rho=v[0]
+          new_alpha=v[1, @nr-1]
+          new_beta=v[@nr, @nc-1]
+          pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
+          pr.loglike
+        }
+        loglike_df = Proc.new {|v, params, df |
+          compute_derivatives_vector(v,df)
+        }
+        my_func = GSL::MultiMin::Function_fdf.alloc(loglike_f,loglike_df, np)
+        my_func.set_params(parameters)      # parameters
+        x = GSL::Vector.alloc(parameters.dup)
+        minimizer = GSL::MultiMin::FdfMinimizer.alloc('conjugate_pr',np)
+        minimizer.set(my_func, x, 1, 1e-3)
+        iter = 0
+        message=""
+        begin_time=Time.new
+        begin
+          iter += 1
+          status = minimizer.iterate()
+          #p minimizer.f
+          #p minimizer.gradient
+          status = minimizer.test_gradient(1e-3)
+          if status == GSL::SUCCESS
+            total_time=Time.new-begin_time
+            message+="Joint MLE converged to minimum on %0.3f seconds at\n" % total_time
+          end
+          x = minimizer.x
+          message+= sprintf("%5d iterations", iter)+"\n";
+          message+= "args="
+          for i in 0...np do
+            message+=sprintf("%10.3e ", x[i])
+          end
+          message+=sprintf("f() = %7.3f\n"  , minimizer.f)+"\n";
+        end while status == GSL::CONTINUE and iter < @max_iterations
+        @iteration=iter
+        @log+=message
+        @r=minimizer.x[0]
+        @alpha=minimizer.x[1,@nr-1].to_a
+        @beta=minimizer.x[@nr,@nc-1].to_a
+        @loglike_model= -minimizer.minimum
+        pr=Processor.new(@alpha,@beta,@r,@matrix)
+      end
       # Compute Polychoric correlation with joint estimate.
       # Rho and thresholds are estimated at same time.
       # Code based on R package "polycor", by J.Fox.
       #
-      def compute_one_step_mle
+      def compute_one_step_mle_without_derivatives
         # Get initial values with two-step aproach
         compute_two_step_mle_drasgow
         # Start iteration with past values
@@ -511,10 +422,12 @@ module Statsample
         cut_alpha=@alpha
         cut_beta=@beta
         parameters=[rho]+cut_alpha+cut_beta
+        np=@nc-1+@nr
         minimization = Proc.new { |v, params|
-         rho=v[0]
-         alpha=v[1, @nr-1]
-         beta=v[@nr, @nc-1]
+          new_rho=v[0]
+         new_alpha=v[1, @nr-1]
+         new_beta=v[@nr, @nc-1]
          #puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
          #(@nr-1).times {|k|
@@ -525,10 +438,12 @@ module Statsample
          #(@nc-1).times {|k|
          #  puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
          #}
+         pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
-         loglike(alpha,beta,rho)
+         df=Array.new(np)
+         #compute_derivatives_vector(v,df)
+         pr.loglike
         }
-        np=@nc-1+@nr
         my_func = GSL::MultiMin::Function.alloc(minimization, np)
         my_func.set_params(parameters)      # parameters
@@ -542,12 +457,14 @@ module Statsample
         iter = 0
         message=""
+        begin_time=Time.new
         begin
           iter += 1
           status = minimizer.iterate()
           status = minimizer.test_size(@epsilon)
           if status == GSL::SUCCESS
-            message="Joint MLE converged to minimum at\n"
+            total_time=Time.new-begin_time
+            message="Joint MLE converged to minimum on %0.3f seconds at\n" % total_time
           end
           x = minimizer.x
           message+= sprintf("%5d iterations", iter)+"\n";
@@ -830,8 +747,8 @@ module Statsample
         end # 43
         raise "Error" if norts==0
         @r=pcorl
-        @loglike_model=-loglike(@alpha, @beta, @r)
+        pr=Processor.new(@alpha,@beta,@r,@matrix)
+        @loglike_model=-pr.loglike
       end
       #Computes vector h(mm7) of orthogonal hermite...
@@ -878,7 +795,8 @@ module Statsample
           t.row([_("Threshold Y %d") % i, sprintf("%0.4f", val)])
         }
         section.add(t)
-        section.add(_("Test of bivariate normality: X2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
+        section.add(_("Iterations: %d") % @iteration)
+        section.add(_("Test of bivariate normality: X^2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
         generator.parse_element(section)
       end
     end

data/lib/statsample/bivariate/polychoric/processor.rb ADDED Viewed

@@ -0,0 +1,261 @@
+module Statsample
+  module Bivariate
+    class Polychoric
+      # Provides statistics for a given combination of rho, alpha and beta and contingence table.
+      class Processor
+        attr_reader :alpha, :beta, :rho, :matrix
+        EPSILON=1e-10
+        def initialize(alpha,beta,rho,matrix=nil)
+          @alpha=alpha
+          @beta=beta
+          @matrix=matrix
+          @nr=@alpha.size+1
+          @nc=@beta.size+1
+          @rho=rho
+          @pd=nil
+        end
+        def bipdf(i,j)
+           Distribution::NormalBivariate.pdf(a(i), b(j), rho)
+        end
+        def loglike
+          rho=@rho
+          if rho.abs>0.9999
+            rho= (rho>0) ? 0.9999 : -0.9999
+          end
+          loglike=0
+          @nr.times do |i|
+            @nc.times do |j|
+              res=pd[i][j]+EPSILON
+              loglike+= @matrix[i,j]  * Math::log( res )
+            end
+          end
+          -loglike
+        end
+        def a(i)
+          raise "Index #{i} should be <= #{@nr-1}" if i>@nr-1
+          i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
+        end
+        def b(j)
+          raise "Index #{j} should be <= #{@nc-1}" if j>@nc-1
+          j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
+        end
+        def eq12(u,v)
+          Distribution::Normal.pdf(u)*Distribution::Normal.cdf((v-rho*u).quo( Math::sqrt(1-rho**2)))
+        end
+        def eq12b(u,v)
+          Distribution::Normal.pdf(v) * Distribution::Normal.cdf((u-rho*v).quo( Math::sqrt(1-rho**2)))
+        end
+        # Equation(8) from Olsson(1979)
+        def fd_loglike_cell_rho(i, j)
+          bipdf(i,j) - bipdf(i-1,j) - bipdf(i, j-1) + bipdf(i-1, j-1)
+        end
+        # Equation(10) from Olsson(1979)
+        def fd_loglike_cell_a(i, j, k)
+=begin
+          if k==i
+            Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
+          elsif k==(i-1)
+            -Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
+          else
+            0
+          end
+=end
+          if k==i
+            eq12(a(k),b(j))-eq12(a(k), b(j-1))
+          elsif k==(i-1)
+            -eq12(a(k),b(j))+eq12(a(k), b(j-1))
+          else
+            0
+          end
+        end
+        def fd_loglike_cell_b(i, j, m)
+          if m==j
+             eq12b(a(i),b(m))-eq12b(a(i-1),b(m))
+          elsif m==(j-1)
+            -eq12b(a(i),b(m))+eq12b(a(i-1),b(m))
+          else
+            0
+          end
+=begin
+          if m==j
+            Distribution::NormalBivariate.pd_cdf_x(a(i),b(m), rho) - Distribution::NormalBivariate.pd_cdf_x(a(i-1),b(m),rho)
+          elsif m==(j-1)
+            -Distribution::NormalBivariate.pd_cdf_x(a(i),b(m),rho) + Distribution::NormalBivariate.pd_cdf_x(a(i-1),b(m),rho)
+          else
+            0
+          end
+=end
+        end
+        # phi_ij for each i and j
+        # Uses equation(4) from Olsson(1979)
+        def pd
+          if @pd.nil?
+            @pd=@nr.times.collect{ [0] * @nc}
+            pc=@nr.times.collect{ [0] * @nc}
+            @nr.times do |i|
+            @nc.times do |j|
+              if i==@nr-1 and j==@nc-1
+                @pd[i][j]=1.0
+              else
+                a=(i==@nr-1) ? 100: alpha[i]
+                b=(j==@nc-1) ? 100: beta[j]
+                #puts "a:#{a} b:#{b}"
+                @pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
+              end
+              pc[i][j] = @pd[i][j]
+              @pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
+              @pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
+              @pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
+            end
+            end
+          end
+          @pd
+        end
+        # First derivate for rho
+        # Uses equation (9) from Olsson(1979)
+        def fd_loglike_rho
+          rho=@rho
+          if rho.abs>0.9999
+            rho= (rho>0) ? 0.9999 : -0.9999
+          end
+          total=0
+          @nr.times do |i|
+            @nc.times do |j|
+              pi=pd[i][j] + EPSILON
+              total+= (@matrix[i,j].quo(pi))  * (bipdf(i,j)-bipdf(i-1,j)-bipdf(i,j-1)+bipdf(i-1,j-1))
+            end
+          end
+          total
+        end
+        # First derivative for alpha_k
+        # Uses equation (6)
+        def fd_loglike_a(k)
+          fd_loglike_a_eq6(k)
+        end
+        # Uses equation (6) from Olsson(1979)
+        def fd_loglike_a_eq6(k)
+          rho=@rho
+          if rho.abs>0.9999
+            rho= (rho>0) ? 0.9999 : -0.9999
+          end
+          total=0
+          @nr.times do |i|
+            @nc.times  do |j|
+              total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * fd_loglike_cell_a(i,j,k)
+            end
+          end
+          total
+        end
+        # Uses equation(13) from Olsson(1979)
+        def fd_loglike_a_eq13(k)
+          rho=@rho
+          if rho.abs>0.9999
+            rho= (rho>0) ? 0.9999 : -0.9999
+          end
+          total=0
+          a_k=a(k)
+          @nc.times do |j|
+            #puts "j: #{j}"
+            #puts "b #{j} : #{b.call(j)}"
+            #puts "b #{j-1} : #{b.call(j-1)}"
+            e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
+            e_2=Distribution::Normal.pdf(a_k)
+            e_3=Distribution::Normal.cdf((b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
+            #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
+            total+= e_1*e_2*e_3
+          end
+          total
+        end
+        # First derivative for b
+        # Uses equation 6 (Olsson, 1979)
+        def fd_loglike_b_eq6(m)
+          rho=@rho
+          if rho.abs>0.9999
+            rho= (rho>0) ? 0.9999 : -0.9999
+          end
+          total=0
+          @nr.times do |i|
+            @nc.times  do |j|
+              total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * fd_loglike_cell_b(i,j,m)
+            end
+          end
+          total
+        end
+        # First derivative for beta_m.
+        # Uses equation 6 (Olsson,1979)
+        def fd_loglike_b(m)
+          fd_loglike_b_eq14(m)
+        end
+        # First derivative for beta_m
+        # Uses equation(14) from Olsson(1979)
+        def fd_loglike_b_eq14(m)
+          rho=@rho
+          if rho.abs>0.9999
+            rho= (rho>0) ? 0.9999 : -0.9999
+          end
+          total=0
+          b_m=b(m)
+          @nr.times do |i|
+            e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
+            e_2=Distribution::Normal.pdf(b_m)
+            e_3=Distribution::Normal.cdf((a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
+            #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
+            total+= e_1*e_2*e_3
+          end
+          total
+        end
+        # Returns the derivative correct according to order
+        def im_function(t,i,j)
+          if t==0
+            fd_loglike_cell_rho(i,j)
+          elsif t>=1 and t<=@alpha.size
+            fd_loglike_cell_a(i,j,t-1)
+          elsif t>=@alpha.size+1 and t<=(@alpha.size+@beta.size)
+            fd_loglike_cell_b(i,j,t-@alpha.size-1)
+          else
+            raise "incorrect #{t}"
+          end
+        end
+        def information_matrix
+          total_n=@matrix.total_sum
+          vars=@alpha.size+@beta.size+1
+          matrix=vars.times.map { vars.times.map {0}}
+          vars.times do |m|
+            vars.times do |n|
+              total=0
+              (@nr-1).times do |i|
+                (@nc-1).times do |j|
+                  total+=(1.quo(pd[i][j]+EPSILON)) * im_function(m,i,j) * im_function(n,i,j)
+                end
+              end
+              matrix[m][n]=total_n*total
+            end
+          end
+          m=::Matrix.rows(matrix)
+        end
+      end # Processor
+    end # Polychoric
+  end # Bivariate
+end # Statsample

data/spec/spec.opts ADDED Viewed

@@ -0,0 +1,3 @@
+--color
+-f s
+-b

data/spec/statsample/bivariate/polychoric_processor_spec.rb ADDED Viewed

@@ -0,0 +1,47 @@
+$:.unshift(File.dirname(__FILE__)+"/../../")
+require 'spec_helper'
+describe Statsample::Bivariate::Polychoric::Processor do
+  before do
+     @matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
+     @alpha=[-0.027, 1.137]
+     @beta=[-0.240, 1.1578]
+     @rho=0.420
+     @processor=Statsample::Bivariate::Polychoric::Processor.new(@alpha,@beta,@rho,@matrix)
+  end
+  it "im_function  method should return correct values according to index" do
+    @processor.im_function(0,0,0).should==@processor.fd_loglike_cell_rho(0,0)
+    @processor.im_function(1,0,0).should==@processor.fd_loglike_cell_a(0,0,0)
+    @processor.im_function(2,0,0).should==@processor.fd_loglike_cell_a(0,0,1)
+    @processor.im_function(3,1,0).should==@processor.fd_loglike_cell_b(1,0,0)
+    @processor.im_function(4,0,1).should==@processor.fd_loglike_cell_b(0,1,1)
+    lambda {@processor.im_function(5)}.should raise_error
+  end
+  it "should return informacion matrix" do
+    p @processor.information_matrix.inverse
+  end
+  it "fd a loglike should be equal usign eq.6 and eq.13" do
+    2.times {|k|
+      @processor.fd_loglike_a_eq6(k).should be_close @processor.fd_loglike_a_eq13(k), 1e-10
+    }
+  end
+  it "fd b loglike should be equal usign eq.6 and eq.14" do
+    2.times {|m|
+      @processor.fd_loglike_b_eq6(m).should be_close @processor.fd_loglike_b_eq14(m), 1e-10
+    }
+  end
+  it "fd rho should be equal usign fd_loglike_cell_rho and fd_loglike_rho" do
+    total=0
+    nr=@alpha.size+1
+    nc=@beta.size+1
+    nr.times do |i|
+      nc.times do |j|
+        pi=@processor.pd[i][j] + 1e-10
+        total+= (@matrix[i,j].quo(pi))  * @processor.fd_loglike_cell_rho(i,j)
+      end
+    end
+    total.should==@processor.fd_loglike_rho
+  end
+end

data/spec/statsample/bivariate/polychoric_spec.rb CHANGED Viewed

@@ -14,13 +14,34 @@ describe "Statsample::Bivariate polychoric extension" do
   end
 describe Statsample::Bivariate::Polychoric do
   before do
     matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
     @poly=Statsample::Bivariate::Polychoric.new(matrix)
   end
   it "should have summary.size > 0" do
+    @poly.method=:two_step
     @poly.summary.size.should>0
   end
+  def check_joint
+    @poly.r.should be_close(0.4192, 0.0001)
+    @poly.threshold_x[0].should be_close(-0.0297, 0.0001)
+    @poly.threshold_x[1].should be_close(1.1331, 0.0001)
+    @poly.threshold_y[0].should be_close(-0.2421, 0.0001)
+    @poly.threshold_y[1].should be_close(1.5938 ,0.0001)
+    @poly.chi_square.should be_close(11.54,0.01)
+  end
+  it "compute joint estimation (without derivative) using gsl" do
+    @poly.compute_one_step_mle_without_derivatives
+    check_joint
+  end
+  it "compute joint estimation (with derivative) using gsl" do
+    @poly.compute_one_step_mle_with_derivatives
+    check_joint
+  end
   def check_two_step
     @poly.r.should be_close(0.420, 0.001)
     @poly.threshold_y[0].should be_close(-0.240 ,0.001)
@@ -32,37 +53,24 @@ describe Statsample::Bivariate::Polychoric do
       @poly.compute_two_step_mle_drasgow_ruby
       check_two_step
   end
-  if Statsample.has_gsl?
-    it "compute two-step with gsl" do
-      @poly.compute_two_step_mle_drasgow_gsl
-      check_two_step
-    end
-    it "compute polychoric series using gsl" do
-      @poly.method=:polychoric_series
-      @poly.compute
-      @poly.r.should be_close(0.556, 0.001)
-      @poly.threshold_y[0].should be_close(-0.240 ,0.001)
-      @poly.threshold_x[0].should be_close(-0.027 ,0.001)
-      @poly.threshold_y[1].should be_close(1.578  ,0.001)
-      @poly.threshold_x[1].should be_close(1.137  ,0.001)
-    end
-    it "compute joint estimation (without derivative) using gsl" do
-      @poly.method=:joint
-      @poly.compute
-      @poly.method.should==:joint
-      @poly.r.should be_close(0.4192, 0.0001)
-      @poly.threshold_y[0].should be_close(-0.2421, 0.0001)
-      @poly.threshold_x[0].should be_close(-0.0297, 0.0001)
-      @poly.threshold_y[1].should be_close(1.5938 ,0.0001)
-      @poly.threshold_x[1].should be_close(1.1331, 0.0001)
-    end
-  else
-    it "compute two-step with gsl requires rb-gsl"
-    it "compute polychoric series requires rb-gsl"
-    it "compute joint estimation requires rb-gsl"
+  it "compute two-step with gsl" do
+    pending("requires rb-gsl") unless Statsample.has_gsl?
+    @poly.compute_two_step_mle_drasgow_gsl
+    check_two_step
   end
+  it "compute polychoric series using gsl" do
+    pending("requires rb-gsl") unless Statsample.has_gsl?
+    @poly.method=:polychoric_series
+    @poly.compute
+    @poly.r.should be_close(0.556, 0.001)
+    @poly.threshold_y[0].should be_close(-0.240 ,0.001)
+    @poly.threshold_x[0].should be_close(-0.027 ,0.001)
+    @poly.threshold_y[1].should be_close(1.578  ,0.001)
+    @poly.threshold_x[1].should be_close(1.137  ,0.001)
+  end
 end

metadata CHANGED Viewed

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 0
   - 13
-  - 2
-  version: 0.13.2
+  - 3
+  version: 0.13.3
 platform: ruby
 authors:
 - Claudio Bustos
@@ -35,7 +35,7 @@ cert_chain:
   rpP0jjs0
   -----END CERTIFICATE-----
-date: 2010-06-21 00:00:00 -04:00
+date: 2010-06-22 00:00:00 -04:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -89,8 +89,11 @@ files:
 - data/tetmat_test.txt
 - lib/statsample/bivariate/extension_version.rb
 - lib/statsample/bivariate/polychoric.rb
+- lib/statsample/bivariate/polychoric/processor.rb
 - lib/statsample/bivariate/tetrachoric.rb
+- spec/spec.opts
 - spec/spec_helper.rb
+- spec/statsample/bivariate/polychoric_processor_spec.rb
 - spec/statsample/bivariate/polychoric_spec.rb
 - spec/statsample/bivariate/tetrachoric_spec.rb
 has_rdoc: true

metadata.gz.sig CHANGED Viewed

Binary file