RubyGems - statsample - Versions diffs - 0.5.0 → 0.5.1 - Mend

statsample 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

data/History.txt +11 -0
data/Manifest.txt +7 -0
data/README.txt +3 -3
data/data/repeated_fields.csv +7 -0
data/data/tetmat_matrix.txt +5 -0
data/data/tetmat_test.txt +1001 -0
data/demo/spss_matrix.rb +3 -0
data/lib/spss.rb +1 -1
data/lib/statistics2.rb +1 -1
data/lib/statsample.rb +30 -1
data/lib/statsample/anova.rb +62 -66
data/lib/statsample/bivariate.rb +273 -281
data/lib/statsample/bivariate/tetrachoric.rb +418 -0
data/lib/statsample/codification.rb +15 -15
data/lib/statsample/combination.rb +108 -106
data/lib/statsample/converter/csv18.rb +52 -52
data/lib/statsample/converter/csv19.rb +45 -48
data/lib/statsample/converter/spss.rb +47 -0
data/lib/statsample/converters.rb +74 -77
data/lib/statsample/crosstab.rb +21 -17
data/lib/statsample/dataset.rb +595 -543
data/lib/statsample/dominanceanalysis.rb +7 -10
data/lib/statsample/htmlreport.rb +23 -0
data/lib/statsample/regression/multiple/baseengine.rb +59 -59
data/lib/statsample/regression/multiple/gslengine.rb +1 -1
data/lib/statsample/reliability.rb +165 -145
data/lib/statsample/vector.rb +16 -2
data/test/test_anova.rb +16 -16
data/test/test_bivariate.rb +146 -0
data/test/test_csv.rb +6 -0
data/test/test_dataset.rb +49 -5
data/test/test_statistics.rb +6 -90
data/test/test_vector.rb +27 -10
metadata +10 -4
data/test/test_r.rb +0 -9
data/test/test_stata.rb +0 -11

data/demo/spss_matrix.rb ADDED Viewed

@@ -0,0 +1,3 @@
+require File.dirname(__FILE__)+"/../lib/statsample"
+ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
+puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)

data/lib/spss.rb CHANGED Viewed

@@ -6,7 +6,7 @@
 #
 # Claudio Bustos mailto:clbustos@gmail.com
-module SPSS
+module SPSS # :nodoc: all
     module Dictionary
         class Element
             def add(a)

data/lib/statistics2.rb CHANGED Viewed

@@ -10,7 +10,7 @@
 #     [1] http://www.matsusaka-u.ac.jp/~okumura/algo/
 #     [2] http://www5.airnet.ne.jp/tomy/cpro/sslib11.htm
-module Statistics2
+module Statistics2  # :nodoc:
   SQ2PI = Math.sqrt(2 * Math::PI)
   # Newton approximation

data/lib/statsample.rb CHANGED Viewed

@@ -38,6 +38,34 @@ class String
   end
 end
+class Array
+  # Recode repeated values on an array, adding the number of repetition
+  # at the end
+  # Example:
+  #   a=%w{a b c c d d d e}
+  #   a.recode_repeated
+  #   => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
+  def recode_repeated
+    if self.size!=self.uniq.size
+      # Find repeated
+      repeated=self.inject({}) {|a,v|
+      (a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v| k}
+      ns=repeated.inject({}) {|a,v| a[v]=0;a}
+      self.collect do |f|
+        if repeated.include? f
+          ns[f]+=1
+          sprintf("%s_%d",f,ns[f])
+        else
+          f
+        end
+      end
+    else
+      self
+    end
+  end
+end
 def create_test(*args,&proc)
   description=args.shift
   fields=args
@@ -80,7 +108,7 @@ end
 # * Dataset: An union of vectors.
 #
 module Statsample
-  VERSION = '0.5.0'
+  VERSION = '0.5.1'
   SPLIT_TOKEN = ","
   autoload(:Database, 'statsample/converters')
   autoload(:Anova, 'statsample/anova')
@@ -89,6 +117,7 @@ module Statsample
   autoload(:PlainText, 'statsample/converters')
   autoload(:Excel, 'statsample/converters')
   autoload(:GGobi, 'statsample/converters')
+  autoload(:SPSS, 'statsample/converter/spss')
   autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
   autoload(:HtmlReport, 'statsample/htmlreport')
   autoload(:Mx, 'statsample/converters')

data/lib/statsample/anova.rb CHANGED Viewed

@@ -1,70 +1,66 @@
 module Statsample
-    module Anova
-        # One Way Anova
-        # Example:
-        #   v1=[2,3,4,5,6].to_vector(:scale)
-        #   v2=[3,3,4,5,6].to_vector(:scale)
-        #   v3=[5,3,1,5,6].to_vector(:scale)
-        #   anova=Statsample::Anova::OneWay.new([v1,v2,v3])
-        #   puts anova.f
-        #   puts anova.significance
-        class OneWay
-        def initialize(vectors)
-            @vectors=vectors
+  module Anova
+    # One Way Anova
+    # Example:
+    #   v1=[2,3,4,5,6].to_scale
+    #   v2=[3,3,4,5,6].to_scale
+    #   v3=[5,3,1,5,6].to_scale
+    #   anova=Statsample::Anova::OneWay.new([v1,v2,v3])
+    #   puts anova.f
+    #   puts anova.significance
+    class OneWay
+      def initialize(vectors)
+        @vectors=vectors
+      end
+      # Total sum
+      def sum
+        @vectors.inject(0){|a,v| a+v.sum}
+      end
+      # Total mean
+      def mean
+        sum.quo(n)
+      end
+      # Total sum of squares
+      def sst
+        m=mean.to_f
+        @vectors.inject(0) {|total,vector| total+vector.sum_of_squares(m) }
+      end
+      # Sum of squares within groups
+      def sswg
+        @vectors.inject(0) {|total,vector| total+vector.sum_of_squares }
+      end
+      # Sum of squares between groups
+      def ssbg
+        m=mean
+        @vectors.inject(0) do |total,vector|
+          total + (vector.mean-m).square * vector.size
         end
-        # Total sum
-        def sum
-            @vectors.inject(0){|a,v| a+v.sum}
-        end
-	# Total mean
-	def mean
-            sum.quo(n)
-        end
-        # Total sum of squares
-        def sst
-            m=mean.to_f
-            @vectors.inject(0) {|total,vector|
-                total+vector.sum_of_squares(m)
-            }
-        end
-        # Sum of squares within groups
-        def sswg
-            @vectors.inject(0) {|total,vector|
-                total+vector.sum_of_squares
-            }
-        end
-        # Sum of squares between groups
-        def ssbg
-            m=mean
-            @vectors.inject(0) {|total,vector|
-                total+(vector.mean-m).square*vector.size
-            }
-        end
-        # Degrees of freedom within groups
-        def df_wg
-            @vectors.inject(0) {|a,v| a+(v.size-1)}
-        end
-        # Degrees of freedom between groups
-        def df_bg
-            @vectors.size-1
-        end
-        # Total Degrees of freedom
-        def df_total
-            n-1
-        end
-        # Total number of cases
-        def n
-            @vectors.inject(0){|a,v| a+v.size}
-        end
-        # Fisher
-        def f
-            k=@vectors.size
-            (ssbg*(n-k)) / (sswg*(k-1))
-        end
-        # Significance of Fisher
-        def significance
-            1.0-Distribution::F.cdf(f,df_bg,df_wg)
-        end
-    end
+      end
+      # Degrees of freedom within groups
+      def df_wg
+          @vectors.inject(0) {|a,v| a+(v.size-1)}
+      end
+      # Degrees of freedom between groups
+      def df_bg
+          @vectors.size-1
+      end
+      # Total Degrees of freedom
+      def df_total
+          n-1
+      end
+      # Total number of cases
+      def n
+          @vectors.inject(0){|a,v| a+v.size}
+      end
+      # Fisher
+      def f
+          k=@vectors.size
+          (ssbg*(n-k)) / (sswg*(k-1))
+      end
+      # Significance of Fisher
+      def significance
+          1.0-Distribution::F.cdf(f,df_bg,df_wg)
+      end
     end
+  end
 end

data/lib/statsample/bivariate.rb CHANGED Viewed

@@ -1,286 +1,278 @@
+require 'statsample/bivariate/tetrachoric'
 module Statsample
-    # Diverse correlation methods
-    module Bivariate
-        class << self
-            # Covariance between two vectors
-			def covariance(v1,v2)
-				v1a,v2a=Statsample.only_valid(v1,v2)
-                return nil if v1a.size==0
-				if HAS_GSL
-					GSL::Stats::covariance(v1a.gsl, v2a.gsl)
-				else
-					covariance_slow(v1a,v2a)
-				end
-            end
-            def maximum_likehood_dichotomic(pred,real)
-				preda,reala=Statsample.only_valid(pred,real)
-                sum=0
-               pred.each_index{|i|
-                   sum+=(real[i]*Math::log(pred[i])) + ((1-real[i])*Math::log(1-pred[i]))
-               }
-               sum
-            end
-			def covariance_slow(v1a,v2a) # :nodoc:
-				t=0
-				m1=v1a.mean
-				m2=v1a.mean
-				(0...v1a.size).each {|i|
-					t+=((v1a[i]-m1)*(v2a[i]-m2))
-				}
-				t.to_f / (v1a.size-1)
-			end
-            # Calculate Pearson correlation coefficient between 2 vectors
-            def pearson(v1,v2)
-				v1a,v2a=Statsample.only_valid(v1,v2)
-                return nil if v1a.size ==0
-				if HAS_GSL
-					GSL::Stats::correlation(v1a.gsl, v2a.gsl)
-				else
-					pearson_slow(v1a,v2a)
-				end
-            end
-            def pearson_slow(v1a,v2a) # :nodoc:
-                v1s,v2s=v1a.vector_standarized_pop,v2a.vector_standarized_pop
-                t=0
-                siz=v1s.size
-                (0...v1s.size).each {|i| t+=(v1s[i]*v2s[i]) }
-                t.to_f/v2s.size
-            end
-            # Retrieves the value for t test for a pearson correlation
-            # between two vectors to test the null hipothesis of r=0
-            def t_pearson(v1,v2)
-				v1a,v2a=Statsample.only_valid(v1,v2)
-                r=pearson(v1a,v2a)
-                if(r==1.0)
-                    0
-                else
-                    t_r(r,v1a.size)
-                end
-            end
-            # Retrieves the value for t test for a pearson correlation
-            # giving r and vector size
-            def t_r(r,size)
-                r * Math::sqrt(((size)-2).to_f / (1 - r**2))
-            end
-            # Retrieves the probability value (a la SPSS)
-            # for a given t, size and number of tails.
-            # Uses a second parameter
-            # * :both  or 2  : for r!=0
-            # * :right, :positive or 1  : for r > 0
-            # * :left, :negative        : for r < 0
-            def prop_pearson(t, size, tails=:both)
-                tails=:both if tails==2
-                tails=:right if tails==1 or tails==:positive
-                tails=:left if tails==:negative
-                n_tails=case tails
-                when :both
-                    2
-                else
-                    1
-                end
-                t=-t if t>0 and (tails==:both)
-                cdf=Distribution::T.cdf(t, size-2)
-                if(tails==:right)
-                    1.0-(cdf*n_tails)
-                else
-                    cdf*n_tails
-                end
-            end
-            # Returns residual score after delete variance
-            # from another variable
-            #
-            def residuals(from,del)
-                r=Statsample::Bivariate.pearson(from,del)
-                froms, dels = from.vector_standarized, del.vector_standarized
-                nv=[]
-                froms.data_with_nils.each_index{|i|
-                    if froms[i].nil? or dels[i].nil?
-                        nv.push(nil)
-                    else
-                        nv.push(froms[i]-r*dels[i])
-                    end
-                }
-                nv.to_vector(:scale)
-            end
-            # Correlation between v1 and v2, controling the effect of
-            # control on both.
-            def partial_correlation(v1,v2,control)
-                v1a,v2a,cona=Statsample.only_valid(v1,v2,control)
-                rv1v2=pearson(v1a,v2a)
-                rv1con=pearson(v1a,cona)
-                rv2con=pearson(v2a,cona)
-                (rv1v2-(rv1con*rv2con)).quo(Math::sqrt(1-rv1con**2) * Math::sqrt(1-rv2con**2))
-            end
-            # Covariance matrix.
-            # Order of rows and columns depends on Dataset#fields order
-            def covariance_matrix(ds)
-                ds.collect_matrix do |row,col|
-                    if (ds[row].type!=:scale or ds[col].type!=:scale)
-                        nil
-                    else
-                        covariance(ds[row],ds[col])
-                    end
-                end
-            end
-            # Correlation matrix.
-            # Order of rows and columns depends on Dataset#fields order
-            def correlation_matrix(ds)
-                ds.collect_matrix {|row,col|
-                        if row==col
-                            1.0
-                        elsif (ds[row].type!=:scale or ds[col].type!=:scale)
-                            nil
-                        else
-                            pearson(ds[row],ds[col])
-                        end
-                }
-            end
-            # Retrieves the n valid pairwise
-            def n_valid_matrix(ds)
-                ds.collect_matrix {|row,col|
-                        if row==col
-                            ds[row].valid_data.size
-                        else
-                            rowa,rowb=Statsample.only_valid(ds[row],ds[col])
-                            rowa.size
-                        end
-                }
-            end
-            # Matrix of correlation probability
-            # Order of rows and columns depends on Dataset#fields order
-            def correlation_probability_matrix(ds, tails=:both)
-                rows=ds.fields.collect{|row|
-                    ds.fields.collect{|col|
-                        v1a,v2a=Statsample.only_valid(ds[row],ds[col])
-                        (row==col or ds[row].type!=:scale or ds[col].type!=:scale) ? nil : prop_pearson(t_pearson(ds[row],ds[col]), v1a.size, tails)
-                    }
-                }
-                Matrix.rows(rows)
-            end
-			# Spearman ranked correlation coefficient between 2 vectors
-			def spearman(v1,v2)
-				v1a,v2a=Statsample.only_valid(v1,v2)
-				v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
-                pearson(v1r,v2r)
-			end
-			# Calculate Point biserial correlation.
-			# Equal to Pearson correlation, with one dichotomous value replaced
-			# by "0" and the other by "1"
-			def point_biserial(dichotomous,continous)
-				ds={'d'=>dichotomous,'c'=>continous}.to_dataset.dup_only_valid
-				raise(TypeError, "First vector should be dichotomous") if ds['d'].factors.size!=2
-				raise(TypeError, "Second vector should be continous") if ds['c'].type!=:scale
-				f0=ds['d'].factors.sort[0]
-				m0=ds.filter_field('c') {|c| c['d']==f0}
-				m1=ds.filter_field('c') {|c| c['d']!=f0}
-				((m1.mean-m0.mean).to_f / ds['c'].sdp) * Math::sqrt(m0.size*m1.size.to_f / ds.cases**2)
-			end
-			# Kendall Rank Correlation Coefficient.
-			#
-			# Based on Hervé Adbi article
-			def tau_a(v1,v2)
-				v1a,v2a=Statsample.only_valid(v1,v2)
-				n=v1.size
-				v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
-				o1=ordered_pairs(v1r)
-				o2=ordered_pairs(v2r)
-				delta= o1.size*2-(o2  & o1).size*2
-				1-(delta * 2 / (n*(n-1)).to_f)
-			end
-			# Calculates Tau b correlation.
-			#
-			# Tau-b defines perfect association as strict monotonicity.
-			# Although it requires strict monotonicity to reach 1.0,
-			# it does not penalize ties as much as some other measures.
-			#
-			# Source: http://faculty.chass.ncsu.edu/garson/PA765/assocordinal.htm
-			def tau_b(matrix)
-				v=pairs(matrix)
-				((v['P']-v['Q']).to_f / Math::sqrt((v['P']+v['Q']+v['Y'])*(v['P']+v['Q']+v['X'])).to_f)
-			end
-			# Calculates Goodman and Kruskal's gamma.
-			#
-			# Gamma is the surplus of concordant pairs over discordant pairs,
-			# as a percentage of all pairs ignoring ties.
-			#
-			# Source: http://faculty.chass.ncsu.edu/garson/PA765/assocordinal.htm
-			def gamma(matrix)
-				v=pairs(matrix)
-				(v['P']-v['Q']).to_f / (v['P']+v['Q']).to_f
-			end
-			# Calculate indexes for a matrix
-			# the rows and cols has to be ordered
-			def pairs(matrix)
-				# calculate concordant
-				#p matrix
-				rs=matrix.row_size
-				cs=matrix.column_size
-				conc=disc=ties_x=ties_y=0
-				(0...(rs-1)).each {|x|
-					(0...(cs-1)).each{|y|
-						((x+1)...rs).each{|x2|
-							((y+1)...cs).each{|y2|
-								#p sprintf("%d:%d,%d:%d",x,y,x2,y2)
-								conc+=matrix[x,y]*matrix[x2,y2]
-							}
-						}
-					}
-				}
-				(0...(rs-1)).each {|x|
-					(1...(cs)).each{|y|
-						((x+1)...rs).each{|x2|
-							(0...y).each{|y2|
-								#p sprintf("%d:%d,%d:%d",x,y,x2,y2)
-								disc+=matrix[x,y]*matrix[x2,y2]
-							}
-						}
-					}
-				}
-				(0...(rs-1)).each {|x|
-					(0...(cs)).each{|y|
-						((x+1)...(rs)).each{|x2|
-							ties_x+=matrix[x,y]*matrix[x2,y]
-						}
-					}
-				}
-				(0...rs).each {|x|
-					(0...(cs-1)).each{|y|
-						((y+1)...(cs)).each{|y2|
-							ties_y+=matrix[x,y]*matrix[x,y2]
-						}
-					}
-				}
-				{'P'=>conc,'Q'=>disc,'Y'=>ties_y,'X'=>ties_x}
-			end
-			def ordered_pairs(vector)
-			d=vector.data
-			a=[]
-			(0...(d.size-1)).each{|i|
-				((i+1)...(d.size)).each {|j|
-					a.push([d[i],d[j]])
-				}
-			}
-			a
-			end
-			def sum_of_codeviated(v1,v2)
-				v1a,v2a=Statsample.only_valid(v1,v2)
-				sum=0
-				(0...v1a.size).each{|i|
-					sum+=v1a[i]*v2a[i]
-				}
-				sum-((v1a.sum*v2a.sum) / v1a.size.to_f)
-			end
+  # Diverse correlation methods
+  module Bivariate
+    class << self
+      # Covariance between two vectors
+      def covariance(v1,v2)
+        v1a,v2a=Statsample.only_valid(v1,v2)
+        return nil if v1a.size==0
+        if HAS_GSL
+          GSL::Stats::covariance(v1a.gsl, v2a.gsl)
+        else
+          covariance_slow(v1a,v2a)
+        end
+      end
+      def maximum_likehood_dichotomic(pred,real)
+        preda,reala=Statsample.only_valid(pred,real)
+        sum=0
+        pred.each_index{|i|
+           sum+=(real[i]*Math::log(pred[i])) + ((1-real[i])*Math::log(1-pred[i]))
+        }
+        sum
+      end
+      def covariance_slow(v1a,v2a) # :nodoc:
+        t=0
+        m1=v1a.mean
+        m2=v1a.mean
+        (0...v1a.size).each {|i| t+=((v1a[i]-m1)*(v2a[i]-m2)) }
+        t.to_f / (v1a.size-1)
+      end
+      # Calculate Pearson correlation coefficient between 2 vectors
+      def pearson(v1,v2)
+        v1a,v2a=Statsample.only_valid(v1,v2)
+        return nil if v1a.size ==0
+        if HAS_GSL
+          GSL::Stats::correlation(v1a.gsl, v2a.gsl)
+        else
+          pearson_slow(v1a,v2a)
+        end
+      end
+      def pearson_slow(v1a,v2a) # :nodoc:
+        v1s,v2s=v1a.vector_standarized_pop,v2a.vector_standarized_pop
+        t=0
+        siz=v1s.size
+        (0...v1s.size).each {|i| t+=(v1s[i]*v2s[i]) }
+        t.to_f/v2s.size
+      end
+      # Retrieves the value for t test for a pearson correlation
+      # between two vectors to test the null hipothesis of r=0
+      def t_pearson(v1,v2)
+        v1a,v2a=Statsample.only_valid(v1,v2)
+        r=pearson(v1a,v2a)
+        if(r==1.0)
+          0
+        else
+          t_r(r,v1a.size)
+        end
+      end
+      # Retrieves the value for t test for a pearson correlation
+      # giving r and vector size
+      def t_r(r,size)
+        r * Math::sqrt(((size)-2).to_f / (1 - r**2))
+      end
+      # Retrieves the probability value (a la SPSS)
+      # for a given t, size and number of tails.
+      # Uses a second parameter
+      # * :both  or 2  : for r!=0
+      # * :right, :positive or 1  : for r > 0
+      # * :left, :negative        : for r < 0
+      def prop_pearson(t, size, tails=:both)
+        tails=:both if tails==2
+        tails=:right if tails==1 or tails==:positive
+        tails=:left if tails==:negative
+        n_tails=case tails
+          when :both then 2
+          else 1
+        end
+        t=-t if t>0 and (tails==:both)
+        cdf=Distribution::T.cdf(t, size-2)
+        if(tails==:right)
+          1.0-(cdf*n_tails)
+        else
+          cdf*n_tails
+        end
+      end
+      # Returns residual score after delete variance
+      # from another variable
+      #
+      def residuals(from,del)
+        r=Statsample::Bivariate.pearson(from,del)
+        froms, dels = from.vector_standarized, del.vector_standarized
+        nv=[]
+        froms.data_with_nils.each_index do |i|
+          if froms[i].nil? or dels[i].nil?
+            nv.push(nil)
+          else
+            nv.push(froms[i]-r*dels[i])
+          end
+        end
+        nv.to_vector(:scale)
+      end
+      # Correlation between v1 and v2, controling the effect of
+      # control on both.
+      def partial_correlation(v1,v2,control)
+        v1a,v2a,cona=Statsample.only_valid(v1,v2,control)
+        rv1v2=pearson(v1a,v2a)
+        rv1con=pearson(v1a,cona)
+        rv2con=pearson(v2a,cona)
+        (rv1v2-(rv1con*rv2con)).quo(Math::sqrt(1-rv1con**2) * Math::sqrt(1-rv2con**2))
+      end
+      # Covariance matrix.
+      # Order of rows and columns depends on Dataset#fields order
+      def covariance_matrix(ds)
+        ds.collect_matrix do |row,col|
+          if (ds[row].type!=:scale or ds[col].type!=:scale)
+            nil
+          else
+            covariance(ds[row],ds[col])
+          end
+        end
+      end
+      # Correlation matrix.
+      # Order of rows and columns depends on Dataset#fields order
+      def correlation_matrix(ds)
+        ds.collect_matrix do |row,col|
+          if row==col
+            1.0
+          elsif (ds[row].type!=:scale or ds[col].type!=:scale)
+            nil
+          else
+            pearson(ds[row],ds[col])
+          end
+        end
+      end
+      # Retrieves the n valid pairwise
+      def n_valid_matrix(ds)
+        ds.collect_matrix do |row,col|
+          if row==col
+            ds[row].valid_data.size
+          else
+            rowa,rowb=Statsample.only_valid(ds[row],ds[col])
+            rowa.size
+          end
+        end
+      end
+      # Matrix of correlation probability
+      # Order of rows and columns depends on Dataset#fields order
+      def correlation_probability_matrix(ds, tails=:both)
+        rows=ds.fields.collect do |row|
+          ds.fields.collect do |col|
+            v1a,v2a=Statsample.only_valid(ds[row],ds[col])
+            (row==col or ds[row].type!=:scale or ds[col].type!=:scale) ? nil : prop_pearson(t_pearson(ds[row],ds[col]), v1a.size, tails)
+          end
         end
+        Matrix.rows(rows)
+      end
+      # Spearman ranked correlation coefficient between 2 vectors
+      def spearman(v1,v2)
+        v1a,v2a=Statsample.only_valid(v1,v2)
+        v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
+        pearson(v1r,v2r)
+      end
+      # Calculate Point biserial correlation. Equal to Pearson correlation, with
+      # one dichotomous value replaced by "0" and the other by "1"
+      def point_biserial(dichotomous,continous)
+        ds={'d'=>dichotomous,'c'=>continous}.to_dataset.dup_only_valid
+        raise(TypeError, "First vector should be dichotomous") if ds['d'].factors.size!=2
+        raise(TypeError, "Second vector should be continous") if ds['c'].type!=:scale
+        f0=ds['d'].factors.sort[0]
+        m0=ds.filter_field('c') {|c| c['d']==f0}
+        m1=ds.filter_field('c') {|c| c['d']!=f0}
+        ((m1.mean-m0.mean).to_f / ds['c'].sdp) * Math::sqrt(m0.size*m1.size.to_f / ds.cases**2)
+      end
+      # Kendall Rank Correlation Coefficient.
+      #
+      # Based on Hervé Adbi article
+      def tau_a(v1,v2)
+        v1a,v2a=Statsample.only_valid(v1,v2)
+        n=v1.size
+        v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
+        o1=ordered_pairs(v1r)
+        o2=ordered_pairs(v2r)
+        delta= o1.size*2-(o2  & o1).size*2
+        1-(delta * 2 / (n*(n-1)).to_f)
+      end
+      # Calculates Tau b correlation.
+      #
+      # Tau-b defines perfect association as strict monotonicity. Although it
+      # requires strict monotonicity to reach 1.0, it does not penalize ties as
+      # much as some other measures.
+      #
+      # Source: http://faculty.chass.ncsu.edu/garson/PA765/assocordinal.htm
+      def tau_b(matrix)
+        v=pairs(matrix)
+        ((v['P']-v['Q']).to_f / Math::sqrt((v['P']+v['Q']+v['Y'])*(v['P']+v['Q']+v['X'])).to_f)
+      end
+      # Calculates Goodman and Kruskal's gamma.
+      #
+      # Gamma is the surplus of concordant pairs over discordant pairs, as a
+      # percentage of all pairs ignoring ties.
+      #
+      # Source: http://faculty.chass.ncsu.edu/garson/PA765/assocordinal.htm
+      def gamma(matrix)
+        v=pairs(matrix)
+        (v['P']-v['Q']).to_f / (v['P']+v['Q']).to_f
+      end
+      # Calculate indexes for a matrix the rows and cols has to be ordered
+      def pairs(matrix)
+        # calculate concordant #p matrix
+        rs=matrix.row_size
+        cs=matrix.column_size
+        conc=disc=ties_x=ties_y=0
+        (0...(rs-1)).each {|x|
+          (0...(cs-1)).each{|y|
+            ((x+1)...rs).each{|x2|
+              ((y+1)...cs).each{|y2|
+                # #p sprintf("%d:%d,%d:%d",x,y,x2,y2)
+                conc+=matrix[x,y]*matrix[x2,y2]
+              }
+            }
+          }
+        }
+        (0...(rs-1)).each {|x|
+          (1...(cs)).each{|y|
+            ((x+1)...rs).each{|x2|
+              (0...y).each{|y2|
+                # #p sprintf("%d:%d,%d:%d",x,y,x2,y2)
+                disc+=matrix[x,y]*matrix[x2,y2]
+              }
+            }
+          }
+        }
+        (0...(rs-1)).each {|x|
+          (0...(cs)).each{|y|
+            ((x+1)...(rs)).each{|x2|
+              ties_x+=matrix[x,y]*matrix[x2,y]
+            }
+          }
+        }
+        (0...rs).each {|x|
+          (0...(cs-1)).each{|y|
+            ((y+1)...(cs)).each{|y2|
+              ties_y+=matrix[x,y]*matrix[x,y2]
+            }
+          }
+        }
+        {'P'=>conc,'Q'=>disc,'Y'=>ties_y,'X'=>ties_x}
+      end
+      def ordered_pairs(vector)
+        d=vector.data
+        a=[]
+        (0...(d.size-1)).each{|i|
+          ((i+1)...(d.size)).each {|j|
+            a.push([d[i],d[j]])
+          }
+        }
+        a
+      end
+      def sum_of_codeviated(v1,v2)
+        v1a,v2a=Statsample.only_valid(v1,v2)
+        sum=0
+        (0...v1a.size).each{|i|
+          sum+=v1a[i]*v2a[i]
+        }
+        sum-((v1a.sum*v2a.sum) / v1a.size.to_f)
+      end
     end
+  end
 end