RubyGems - statsample - Versions diffs - 0.3.4 → 0.4.0 - Mend

statsample 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

data/History.txt +8 -0
data/Manifest.txt +20 -2
data/data/crime.txt +47 -0
data/data/test_binomial.csv +201 -0
data/demo/distribution_t.rb +2 -2
data/demo/regression.rb +2 -1
data/lib/distribution.rb +8 -0
data/lib/distribution/chisquare.rb +24 -0
data/lib/distribution/f.rb +25 -0
data/lib/distribution/normal.rb +25 -0
data/lib/distribution/t.rb +22 -0
data/lib/matrix_extension.rb +78 -0
data/lib/statistics2.rb +531 -0
data/lib/statsample.rb +12 -9
data/lib/statsample/anova.rb +1 -5
data/lib/statsample/bivariate.rb +24 -20
data/lib/statsample/combination.rb +14 -4
data/lib/statsample/converters.rb +17 -1
data/lib/statsample/dataset.rb +66 -10
data/lib/statsample/dominanceanalysis/bootstrap.rb +1 -3
data/lib/statsample/graph/gdchart.rb +2 -3
data/lib/statsample/graph/svggraph.rb +8 -4
data/lib/statsample/mle.rb +137 -0
data/lib/statsample/mle/logit.rb +95 -0
data/lib/statsample/mle/normal.rb +83 -0
data/lib/statsample/mle/probit.rb +93 -0
data/lib/statsample/regression.rb +3 -1
data/lib/statsample/regression/binomial.rb +65 -0
data/lib/statsample/regression/binomial/logit.rb +13 -0
data/lib/statsample/regression/binomial/probit.rb +13 -0
data/lib/statsample/regression/multiple.rb +61 -58
data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
data/lib/statsample/srs.rb +5 -5
data/lib/statsample/vector.rb +129 -59
data/test/test_anova.rb +0 -5
data/test/test_dataset.rb +13 -1
data/test/test_distribution.rb +57 -0
data/test/test_gsl.rb +22 -0
data/test/test_logit.rb +22 -0
data/test/test_mle.rb +140 -0
data/test/test_r.rb +9 -0
data/test/test_regression.rb +12 -4
data/test/test_srs.rb +0 -4
data/test/test_stata.rb +11 -0
data/test/test_statistics.rb +0 -15
data/test/test_vector.rb +11 -0
metadata +28 -4
data/lib/statsample/chidistribution.rb +0 -39
data/lib/statsample/regression/logit.rb +0 -35

data/lib/statsample/regression/binomial/logit.rb ADDED Viewed

@@ -0,0 +1,13 @@
+module Statsample
+    module Regression
+        module Binomial
+            # Logistic Regression
+            class Logit < BaseEngine
+                def initialize(ds,y_var)
+                    model=Statsample::MLE::Logit.new
+                    super(ds,y_var,model)
+                end
+            end
+        end
+    end
+end

data/lib/statsample/regression/binomial/probit.rb ADDED Viewed

@@ -0,0 +1,13 @@
+module Statsample
+    module Regression
+        module Binomial
+            # Logistic Regression
+            class Probit < BaseEngine
+                def initialize(ds,y_var)
+                    model=Statsample::MLE::Probit.new
+                    super(ds,y_var,model)
+                end
+            end
+        end
+    end
+end

data/lib/statsample/regression/multiple.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module Statsample
 module Regression
-        # Module for Multiple Regression Analysis
+        # Module for Linear Multiple Regression Analysis
         # You can call Regression::Multiple.listwise or Regression::Multiple.pairwise or instance directly the engines
         # Example.
         #
@@ -35,7 +35,21 @@ module Multiple
     def self.pairwise(ds,y_var)
         RubyEngine.new(ds,y_var)
     end
+    def self.listwise_by_exp(ds,exp)
+    end
+    # Returns a dataset and name of criteria using a expression.
+    # All nominal vectors are replaced by dummy coding
+    # and interactions are calculated
+    def self.ds_by_exp(ds,exp)
+        raise "Not implemented"
+        parts=exp.split(/[\+=]/)
+        dependent=parts.pop
+        ds_out=[]
+        parts.each{|p|
+        }
+    end
     # Base class for Multiple Regression Engines
     class BaseEngine
     def initialize(ds,y_var)
@@ -119,11 +133,7 @@ module Multiple
     end
     # Significance of Fisher
     def significance
-        if HAS_GSL
-            GSL::Cdf.fdist_Q(f,df_r,df_e)
-        else
-            raise "Need Ruby/GSL"
-        end
+        1.0-Distribution::F.cdf(f,df_r,df_e)
     end
         # Tolerance for a given variable
         # http://talkstats.com/showthread.php?t=5056
@@ -136,13 +146,13 @@ module Multiple
             1-lr.r2
         end
         # Tolerances for each coefficient
-    def coeffs_tolerances
-        @fields.inject({}) {|a,f|
-            a[f]=tolerance(f);
-            a
-        }
-    end
-    # Standard Error for coefficients
+        def coeffs_tolerances
+            @fields.inject({}) {|a,f|
+                a[f]=tolerance(f);
+                a
+            }
+        end
+        # Standard Error for coefficients
         def coeffs_se
             out={}
             mse=sse.quo(df_e)
@@ -163,7 +173,6 @@ module Multiple
             x=Matrix.columns(columns)
             matrix=((x.t*x)).inverse * mse
             matrix.collect {|i|
                 Math::sqrt(i) if i>0
             }
         end
@@ -177,10 +186,10 @@ module Multiple
         end
         # Retrieves a summary for Regression
         def summary(report_type=ConsoleSummary)
-            c=coeffs
-            out=""
-            out.extend report_type
-out.add <<HEREDOC
+        c=coeffs
+        out=""
+        out.extend report_type
+        out.add <<HEREDOC
 Summary for regression of #{@fields.join(',')} over #{@y_var}
 *************************************************************
 Engine: #{self.class}
@@ -190,45 +199,39 @@ r2=#{sprintf("%0.3f",r2)}
 Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
 HEREDOC
-out.add_line
-out.add "ANOVA TABLE"
-t=Statsample::ReportTable.new(%w{source ss df ms f s})
-begin
-    t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
-rescue RuntimeError
-    t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), "???", "???"])
-end
-t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
-t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
-out.parse_table(t)
-begin
-    out.add "Beta coefficientes"
-    sc=standarized_coeffs
-    cse=coeffs_se
-    t=Statsample::ReportTable.new(%w{coeff beta se t})
-    t.add_row(["Constant", "-",constant_se, constant_t])
-    @fields.each{|f|
-        t.add_row([f, sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
-    }
-    out.parse_table(t)
-rescue
-end
-out
-end
-    def assign_names(c)
-            a={}
-            @fields.each_index {|i|
-                a[@fields[i]]=c[i]
-            }
-            a
-    end
+        out.add_line
+        out.add "ANOVA TABLE"
+        t=Statsample::ReportTable.new(%w{source ss df ms f s})
+        t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
+        t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
+        t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
+        out.parse_table(t)
+        begin
+        out.add "Beta coefficientes"
+        sc=standarized_coeffs
+        cse=coeffs_se
+        t=Statsample::ReportTable.new(%w{coeff b beta se t})
+        t.add_row(["Constant", sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
+        @fields.each{|f|
+        t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
+        }
+        out.parse_table(t)
+        rescue
+        end
+            out
+        end
+        def assign_names(c)
+                a={}
+                @fields.each_index {|i|
+                    a[@fields[i]]=c[i]
+                }
+                a
+        end
     # Deprecated

data/lib/statsample/regression/multiple/rubyengine.rb CHANGED Viewed

@@ -26,7 +26,7 @@ class RubyEngine < BaseEngine
         obtain_y_vector
         @matrix_x = Bivariate.correlation_matrix(@ds_indep)
         @coeffs_stan=(@matrix_x.inverse * @matrix_y).column(0).to_a
-@min_n_valid=nil
+        @min_n_valid=nil
     end
     def min_n_valid
         if @min_n_valid.nil?

data/lib/statsample/srs.rb CHANGED Viewed

@@ -26,7 +26,7 @@ module Statsample
         end
         # Sample size estimation for proportions, infinite poblation
         def estimation_n0(d,prop,margin=0.95)
-            t=GSL::Cdf.ugaussian_Pinv(1-(1-margin).quo(2))
+            t=Distribution::Normal.p_value(1-(1-margin).quo(2))
             var=prop*(1-prop)
             t**2*var.quo(d**2)
         end
@@ -39,13 +39,13 @@ module Statsample
         # Uses estimated proportion, sample without replacement.
         def proportion_confidence_interval_t(prop, n_sample, n_population, margin=0.95)
-            t=GSL::Cdf.tdist_Pinv(1-((1-margin).quo(2)) , n_sample-1)
+            t = Distribution::T.p_value(1-((1-margin).quo(2)) , n_sample-1)
             proportion_confidence_interval(prop,n_sample,n_population, t)
         end
         # Proportion confidence interval with z values
         # Uses estimated proportion, sample without replacement.
         def proportion_confidence_interval_z(p, n_sample, n_population, margin=0.95)
-            z=GSL::Cdf.ugaussian_Pinv(1-((1-margin).quo(2)))
+            z=Distribution::Normal.p_value(1-((1-margin).quo(2)))
             proportion_confidence_interval(p,n_sample,n_population, z)
         end
         # Proportion confidence interval with x value
@@ -137,13 +137,13 @@ module Statsample
         # Confidence Interval using T-Student
         # Use with n < 60
         def mean_confidence_interval_t(mean,s,n_sample,n_population,margin=0.95)
-            t=GSL::Cdf.tdist_Pinv(1-((1-margin) / 2),n_sample-1)
+            t=Distribution::T.p_value(1-((1-margin) / 2),n_sample-1)
             mean_confidence_interval(mean,s,n_sample,n_population,t)
         end
         # Confidente Interval using Z
         # Use with n > 60
         def mean_confidence_interval_z(mean,s,n_sample,n_population,margin=0.95)
-            z=GSL::Cdf.ugaussian_Pinv(1-((1-margin) / 2))
+            z=Distribution::Normal.p_value(1-((1-margin) / 2))
             mean_confidence_interval(mean,s,n_sample,n_population, z)
         end
         # Confidente interval using X.

data/lib/statsample/vector.rb CHANGED Viewed

@@ -42,20 +42,38 @@ module Statsample
 class Vector
     include Enumerable
-    attr_reader :type, :data, :valid_data, :missing_values, :missing_data, :data_with_nils, :gsl
+    # Level of measurement. Could be :nominal, :ordinal or :scale
+    attr_reader :type
+    # Original data.
+    attr_reader :data
+    # Valid data. Equal to data, minus values assigned as missing values
+    attr_reader :valid_data
+    # Array of values considered as missing. Nil is a missing value, by default
+    attr_reader :missing_values
+    # Missing values array
+    attr_reader :missing_data
+    # Original data, with all missing values replaced by nils
+    attr_reader :data_with_nils
+    # GSL Object, only available with rbgsl extension and type==:scale
+    attr_reader :gsl
+    # Change label for specific values
     attr_accessor :labels
-        # Creates a new
-        # data = Array of data
-        # t = level of meausurement. Could be:
-        # [:nominal] : Nominal level of measurement
-        # [:ordinal] : Ordinal level of measurement
-        # [:scale]   : Scale level of meausurement
-        #
-		def initialize(data=[],t=:nominal,missing_values=[],labels={})
+    # Creates a new Vector object.
+    # [data]            Array of data.
+    # [type]            Level of meausurement. See Vector#type
+    # [missing_values]  Array of missing values. See Vector#missing_values
+    # [labels]          Labels for data values
+    #
+    # The fast way to create a vector uses Array#to_vector. Remember
+    # to include as the first argument the level of measurement
+    #
+    #  v=[1,2,3,4].to_vector(:scale)
+    #
+    def initialize(data=[], t=:nominal,missing_values=[],labels={})
             raise "Data should be an array" unless data.is_a? Array
-			@data=data
-			@missing_values=missing_values
-			@labels=labels
+            @data=data
+            @missing_values=missing_values
+            @labels=labels
             @type=t
             @valid_data=[]
             @data_with_nils=[]
@@ -65,6 +83,9 @@ class Vector
 			set_valid_data_intern
 			self.type=t
 		end
+        # Creates a duplicate of the Vector.
+        # Note: data, missing_values and labels are duplicated, so
+        # changes on original vector doesn't propages to copies.
         def dup
             Vector.new(@data.dup,@type,@missing_values.dup,@labels.dup)
         end
@@ -73,14 +94,17 @@ class Vector
         def dup_empty
             Vector.new([],@type,@missing_values.dup,@labels.dup)
         end
+        # Raises an exception if type of vector is inferior to t type
+        def check_type(t)
+            raise NoMethodError if (t==:scale and @type!=:scale) or (t==:ordinal and @type==:nominal)
+        end
+        private :check_type
         # Return a vector usign the standarized values for data
         # with sd with denominator N
 		def vector_standarized_pop
 			vector_standarized(true)
 		end
-        def check_type(t)
-            raise NoMethodError if (t==:scale and @type!=:scale) or (t==:ordinal and @type==:nominal)
-        end
         # Return a vector usign the standarized values for data
         # with sd with denominator n-1
@@ -114,48 +138,63 @@ class Vector
             }.to_vector(:scale)
         end
-        # Vector equality
+        # Vector equality.
         # Two vector will be the same if their data, missing values, type, labels are equals
         def ==(v2)
             raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
             @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels=v2.labels
         end
-        def _dump(i)
+        def _dump(i) # :nodoc:
             Marshal.dump({'data'=>@data,'missing_values'=>@missing_values, 'labels'=>@labels, 'type'=>@type})
         end
-        def self._load(data)
+        def self._load(data) # :nodoc:
             h=Marshal.load(data)
             Vector.new(h['data'], h['type'], h['missing_values'], h['labels'])
         end
+        # Returns a new vector, with data modified by block.
+        # Equivalent to create a Vector after #collect on data
         def recode
             @data.collect{|x|
                 yield x
             }.to_vector(@type)
         end
+        # Modifies current vector, with data modified by block.
+        # Equivalent to #collect! on @data
         def recode!
             @data.collect!{|x|
                 yield x
             }
             set_valid_data
         end
+        # Iterate on each item
+        # Equivalent to
+        #   @data.each{|x| yield x}
         def each
-            @data.each{|x|
-                yield(x)
-            }
+            @data.each{|x| yield(x) }
         end
+        # Iterate on each item_index
         def each_index
             (0...@data.size).each {|i|
                 yield(i)
             }
         end
-        # Add a value at the end of the vector
-        # If second argument set to false, you should update valid data usign
+        # Add a value at the end of the vector.
+        # If second argument set to false, you should update the Vector usign
         # Vector#set_valid_data at the end of your insertion cycle
+        #
         def add(v,update_valid=true)
             @data.push(v)
             set_valid_data if update_valid
         end
+        # Update valid_data, missing_data, data_with_nils and gsl
+        # at the end of an insertion
+        #
+        # Use after add(v,false)
         def set_valid_data
 			@valid_data.clear
 			@missing_data.clear
@@ -186,6 +225,7 @@ class Vector
             end
             @has_missing_data=@missing_data.size>0
         end
         # Retrieves true if data has one o more missing values
         def has_missing_data?
             @has_missing_data
@@ -193,7 +233,7 @@ class Vector
         def labeling(x)
             @labels.has_key?(x) ? @labels[x].to_s : x.to_s
         end
-        # Returns a Vector with the data with labels replaced by the label
+        # Returns a Vector with the data with labels replaced by the label.
         def vector_labeled
             d=@data.collect{|x|
                 if @labels.has_key? x
@@ -204,12 +244,18 @@ class Vector
             }
             Vector.new(d,@type)
         end
+        # Size of total data
         def size
             @data.size
         end
+        alias_method :n, :size
+        # Retrieves i element of data
         def [](i)
             @data[i]
         end
+        # Set i element of data.
+        # Note: Use set_valid_data if you include missing values
         def []=(i,v)
             @data[i]=v
         end
@@ -227,7 +273,7 @@ class Vector
 			@type=t
             set_scale_data if(t==:scale)
 		end
-        def n; @data.size ; end
         def to_a
             @data.dup
         end
@@ -292,10 +338,11 @@ class Vector
             end
         end
-        # Return an array with the data splitted by a separator
-        # a=Vector.new(["a,b","c,d","a,b","d"])
-        # a.splitted
-        # [["a","b"],["c","d"],["a","b"],["d"]]
+        # Return an array with the data splitted by a separator.
+        #   a=Vector.new(["a,b","c,d","a,b","d"])
+        #   a.splitted
+        #     =>
+        #   [["a","b"],["c","d"],["a","b"],["d"]]
         def splitted(sep=Statsample::SPLIT_TOKEN)
             @data.collect{|x|
                 if x.nil?
@@ -311,11 +358,14 @@ class Vector
         # defined on the fields
         # Example:
         #
-        # a=Vector.new(["a,b","c,d","a,b"])
+        #  a=Vector.new(["a,b","c,d","a,b"])
         #  a.split_by_separator
-        #    {"a"=>#<Statsample::Type::Nominal:0x7f2dbcc09d88 @data=[1, 0, 1]>,
-        #     "b"=>#<Statsample::Type::Nominal:0x7f2dbcc09c48 @data=[1, 1, 0]>,
-        #     "c"=>#<Statsample::Type::Nominal:0x7f2dbcc09b08 @data=[0, 1, 1]>}
+        #  =>  {"a"=>#<Statsample::Type::Nominal:0x7f2dbcc09d88
+        #        @data=[1, 0, 1]>,
+        #       "b"=>#<Statsample::Type::Nominal:0x7f2dbcc09c48
+        #        @data=[1, 1, 0]>,
+        #      "c"=>#<Statsample::Type::Nominal:0x7f2dbcc09b08
+        #        @data=[0, 1, 1]>}
         #
         def split_by_separator(sep=Statsample::SPLIT_TOKEN)
             split_data=splitted(sep)
@@ -353,7 +403,7 @@ class Vector
         # In all the trails, every item have the same probability
         # of been selected
 		def sample_with_replacement(sample=1)
-            if(@type!=:scale)
+            if(@type!=:scale or !HAS_GSL)
                 vds=@valid_data.size
                 (0...sample).collect{ @valid_data[rand(vds)] }
             else
@@ -368,7 +418,7 @@ class Vector
         # A sample of the same size of the vector is the vector itself
         def sample_without_replacement(sample=1)
-            if(@type!=:scale)
+            if(@type!=:scale or !HAS_GSL)
                 raise ArgumentError, "Sample size couldn't be greater than n" if sample>@valid_data.size
                 out=[]
                 size=@valid_data.size
@@ -393,7 +443,8 @@ class Vector
                 frequencies[x].nil? ? 0 : frequencies[x]
             end
         end
-        # returns the real type for the vector, according to its content
+        # returns the database type for the vector, according to its content
         def db_type(dbs='mysql')
             # first, detect any character not number
             if @data.find {|v|  v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/} or @data.find {|v|  v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/}
@@ -416,15 +467,28 @@ class Vector
         end
         def to_s
             sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
+        end
+        # Ugly name. Really, create a Vector for standard 'matrix' package.
+        # <tt>dir</tt> could. be :horizontal or :vertical
+        def to_matrix(dir=:horizontal)
+            case dir
+            when :horizontal
+                Matrix[@data]
+            when :vertical
+                Matrix.columns([@data])
+            end
         end
 		def inspect
 			self.to_s
 		end
+        def as_r
+            @data.dup
+        end
         def factors
             if @type==:scale
                 @scale_data.uniq.sort
             else
-                    @valid_data.uniq.sort
+                @valid_data.uniq.sort
             end
         end
         if Statsample::STATSAMPLE__.respond_to?(:frequencies)
@@ -472,16 +536,16 @@ class Vector
             end
-            # Returns the most frequent item
+            # Returns the most frequent item.
 			def mode
 				frequencies.max{|a,b| a[1]<=>b[1]}[0]
 			end
-            # The numbers of item with valid data
+            # The numbers of item with valid data.
             def n_valid
                 @valid_data.size
             end
             # Returns a hash with the distribution of proportions of
-            # the sample
+            # the sample.
             def proportions
                 frequencies.inject({}){|a,v|
                     a[v[0]] = v[1].quo(n_valid)
@@ -512,13 +576,11 @@ class Vector
                 out
             end
             # Variance of p, according to poblation size
             def variance_proportion(n_poblation, v=1)
                 Statsample::proportion_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
             end
+            # Variance of p, according to poblation size
             def variance_total(n_poblation, v=1)
                 Statsample::total_variance_sample(self.proportion(v), @valid_data.size, n_poblation)
             end
@@ -534,7 +596,10 @@ class Vector
 				alias_method met_or, met
 			end
 		}
-        # Ordinal Methods
+        ######
+        ### Ordinal Methods
+        ######
         # Return the value of the percentil q
             def percentil(q)
                 check_type :ordinal
@@ -546,7 +611,7 @@ class Vector
                     (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
                 end
             end
-			# Returns a ranked vector
+			# Returns a ranked vector.
 			def ranked(type=:ordinal)
                 check_type :ordinal
 				i=0
@@ -593,6 +658,8 @@ class Vector
                 @gsl=GSL::Vector.alloc(@scale_data) if @scale_data.size>0
             end
           end
+          private :set_scale_data
             # The range of the data (max - min)
 			def range;
                 check_type :scale
@@ -608,9 +675,12 @@ class Vector
 					sum.to_f.quo(n_valid)
 			end
+            # Sum of squares for the data around a value.
+            # By default, this value is the  mean
+            #   ss= sum{(xi-m)^2}
+            #
             def sum_of_squares(m=nil)
                 check_type :scale
                 m||=mean
                 @scale_data.inject(0){|a,x| a+(x-m).square}
             end
@@ -618,27 +688,25 @@ class Vector
 			# Sum of squared deviation
 			def sum_of_squared_deviation
                 check_type :scale
 				@scale_data.inject(0) {|a,x| x.square+a} - (sum.square.quo(n_valid))
 			end
-            # Population variance (divided by n)
+            # Population variance (denominator N)
             def variance_population(m=nil)
                 check_type :scale
                 m||=mean
 				squares=@scale_data.inject(0){|a,x| x.square+a}
                 squares.quo(n_valid) - m.square
             end
-            # Population Standard deviation (divided by n)
+            # Population Standard deviation (denominator N)
             def standard_deviation_population(m=nil)
                 check_type :scale
                 Math::sqrt( variance_population(m) )
             end
-            # Sample Variance (divided by n-1)
+            # Sample Variance (denominator n-1)
 			def variance_sample(m=nil)
                 check_type :scale
@@ -647,7 +715,7 @@ class Vector
 				sum_of_squares(m).quo(n_valid - 1)
 			end
-            # Sample Standard deviation (divided by n-1)
+            # Sample Standard deviation (denominator n-1)
 			def standard_deviation_sample(m=nil)
                 check_type :scale
@@ -655,13 +723,14 @@ class Vector
 				m||=m
 				Math::sqrt(variance_sample(m))
 			end
+            # Skewness of the sample
 			def skew
                 check_type :scale
 				m=mean
 				thirds=@scale_data.inject(0){|a,x| a+((x-mean)**3)}
 				thirds.quo((@scale_data.size-1)*sd**3)
 			end
+            # Kurtosis of the sample
 			def kurtosis
                 check_type :scale
@@ -670,9 +739,10 @@ class Vector
 				thirds.quo((@scale_data.size-1)*sd**4)
 			end
+            # Product of all values on the sample
+            #
 			def product
                 check_type :scale
                 @scale_data.inject(1){|a,x| a*x }
             end
 			if HAS_GSL
@@ -712,11 +782,11 @@ class Vector
 					m||=mean
 					@gsl.sd_with_fixed_mean(m)
 				end
-				def skew
+				def skew # :nodoc:
                     check_type :scale
 					@gsl.skew
 				end
-				def kurtosis
+				def kurtosis # :nodoc:
                     check_type :scale
 					@gsl.kurtosis
 				end
@@ -752,8 +822,8 @@ class Vector
 			alias_method :sdp, :standard_deviation_population
 			alias_method :sds, :standard_deviation_sample
 			alias_method :cov, :coefficient_of_variation
-            alias_method :variance, :variance_sample
-            alias_method :sd, :standard_deviation_sample
-            alias_method :ss, :sum_of_squares
+			alias_method :variance, :variance_sample
+			alias_method :sd, :standard_deviation_sample
+			alias_method :ss, :sum_of_squares
 		end
 end