RubyGems - statsample - Versions diffs - 0.4.0 → 0.4.1 - Mend

statsample 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/History.txt +5 -0
data/lib/distribution.rb +8 -0
data/lib/distribution/chisquare.rb +1 -2
data/lib/statsample.rb +6 -6
data/lib/statsample/dominanceanalysis.rb +1 -1
data/lib/statsample/dominanceanalysis/bootstrap.rb +1 -1
data/lib/statsample/graph/svggraph.rb +2 -2
data/lib/statsample/mle.rb +2 -2
data/lib/statsample/regression/binomial.rb +27 -1
data/lib/statsample/vector.rb +53 -35
data/test/test_combination.rb +2 -2
data/test/test_distribution.rb +1 -1
metadata +2 -2

data/History.txt CHANGED Viewed

@@ -1,3 +1,8 @@
+=== 0.4.1 / 2009-09-12
+* More methods and usage documentation
+* Logit tests
+* Bug fix: rescue for requires doesn't specify LoadError
+* Binomial::BaseEngine new methods: coeffs_se, coeffs, constant and constant_se
 === 0.4.0 / 2009-09-10
 * New Distribution module, based on statistics2.rb by Shin-ichiro HARA. Replaces all instances of GSL distributions pdf and cdf calculations for native calculation.
 * New Maximum Likehood Estimation for Logit, Probit and Normal Distribution using Von Tessin(2005) algorithm. See MLE class and subclasses for more information.

data/lib/distribution.rb CHANGED Viewed

@@ -1,4 +1,12 @@
 require 'statistics2'
+# Several distributions modules to calculate cdf, inverse cdf and pdf
+# See Distribution::Pdf for interface.
+#
+# Usage:
+#    Distribution::Normal.cdf(1.96)
+#    => 0.97500210485178
+#    Distribution::Normal.p_value(0.95)
+#    => 1.64485364660836
 module Distribution
     autoload(:ChiSquare, 'distribution/chisquare')
     autoload(:T, 'distribution/t')

data/lib/distribution/chisquare.rb CHANGED Viewed

@@ -1,8 +1,7 @@
 module Distribution
     # Calculate cdf and inverse cdf for Chi Square Distribution.
     #
-    # Based on Babatunde, Iyiola & Eni () :
-    # "A Numerical Procedure for Computing Chi-Square Percentage Points"
+    # Based on Statistics2 module
     #
     module ChiSquare
         class << self

data/lib/statsample.rb CHANGED Viewed

@@ -48,13 +48,13 @@ end
 begin
     require 'gettext'
     rescue LoadError
-    def bindtextdomain(d)
+    def bindtextdomain(d) #:nodoc:
         d
     end
         # Bored module
-        module GetText
-            def _(t)
+        module GetText  #:nodoc:
+            def _(t)
                 t
             end
         end
@@ -81,7 +81,7 @@ end
 #
 module Statsample
-    VERSION = '0.4.0'
+    VERSION = '0.4.1'
     SPLIT_TOKEN = ","
 	autoload(:Database, 'statsample/converters')
     autoload(:Anova, 'statsample/anova')
@@ -90,9 +90,9 @@ module Statsample
 	autoload(:PlainText, 'statsample/converters')
 	autoload(:Excel, 'statsample/converters')
 	autoload(:GGobi, 'statsample/converters')
-    autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
+        autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
 	autoload(:HtmlReport, 'statsample/htmlreport')
-    autoload(:Mx, 'statsample/converters')
+        autoload(:Mx, 'statsample/converters')
 	autoload(:Resample, 'statsample/resample')
 	autoload(:SRS, 'statsample/srs')
 	autoload(:Codification, 'statsample/codification')

data/lib/statsample/dominanceanalysis.rb CHANGED Viewed

@@ -193,7 +193,7 @@ module Statsample
             end
             g=general_averages
-                    t.add_horizontal_line
+	     t.add_horizontal_line
             row=[_("Overall averages"),"",""]+@fields.collect{|f|
                         sprintf("%0.3f",g[f])

data/lib/statsample/dominanceanalysis/bootstrap.rb CHANGED Viewed

@@ -69,7 +69,7 @@ class DominanceAnalysis
             out.extend report_type
             out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
             out.add _("Sample size: %d\n") % @n_samples
-            t=Distribution::T.p_value(1-((1-alfa) / 2),@n_samples - 1)
+            t=Distribution::T.p_value(1-((1-alfa) / 2), @n_samples - 1)
                 out.add "t:#{t}\n"
             out.add "Linear Regression Engine: #{@lr_class.name}"
             out.nl

data/lib/statsample/graph/svggraph.rb CHANGED Viewed

@@ -89,8 +89,8 @@ end
 # replaces all key and fill classes with similar ones, without opacity
 # this allows rendering of svg and png on rox and gqview without problems
-module SVG
-	module Graph
+module SVG #:nodoc:
+	module Graph
 		class BarNoOp < Bar
 			def get_css; SVG::Graph.get_css_standard; end
 		end

data/lib/statsample/mle.rb CHANGED Viewed

@@ -56,8 +56,8 @@ module Statsample
         end
         # Creates a zero matrix Mx1, with M=x.M
         def set_default_parameters(x)
-            fd=x.column_size.times.collect{|i| 0.0}
-            fd.push(0.1)    if self.is_a? Statsample::MLE::Normal
+            fd=[0.0]*x.column_size
+	    fd.push(0.1)    if self.is_a? Statsample::MLE::Normal
             parameters = Matrix.columns([fd])
         end

data/lib/statsample/regression/binomial.rb CHANGED Viewed

@@ -2,13 +2,39 @@
 module Statsample
     module Regression
         module Binomial
+            # Create a Logit model object.
+            # ds:: Dataset
+            # y::  Name of dependent vector
+            # Use
+            #   dataset=Statsample::CSV.read("data.csv")
+            #   y="y"
+            #   lr=Statsample::Regression::Binomial.logit(dataset,y)
+            #
             def self.logit(ds,y_var)
                 Logit.new(ds,y_var)
             end
+            # Create a Probit model object.
+            # ds:: Dataset
+            # y::  Name of dependent vector
+            # Use
+            #   dataset=Statsample::CSV.read("data.csv")
+            #   y="y"
+            #   lr=Statsample::Regression::Binomial.probit(dataset,y)
+            #
             def self.probit(ds,y_var)
                 Probit.new(ds,y_var)
             end
+            # Base Engine for binomial regression analysis.
+            # See Statsample::Regression::Binomial.logit() and
+            # Statsample::Regression::Binomial.probit for fast
+            # access methods.
+            #
+            # Use:
+            #   dataset=Statsample::CSV.read("data.csv")
+            #   y="y"
+            #   model=Statsample::MLE::Logit.new
+            #   lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
          class BaseEngine
              attr_reader :log_likehood, :iterations
             def initialize(ds,y_var,model)

data/lib/statsample/vector.rb CHANGED Viewed

@@ -6,10 +6,10 @@ end
 module Statsample
     class << self
-	# Create a matrix using vectors as columns
+	# Create a matrix using vectors as columns.
     # Use:
     #
-    # matrix=Statsample.vector_cols_matrix(v1,v2)
+    #   matrix=Statsample.vector_cols_matrix(v1,v2)
 	def vector_cols_matrix(*vs)
 		# test
 		size=vs[0].size
@@ -23,7 +23,7 @@ module Statsample
 	end
 	end
 	# Returns a duplicate of the input vectors, without missing data
-	# for any of the vectors
+	# for any of the vectors.
 	#
 	#  a=[1,2,3,6,7,nil,3,5].to_vector(:scale)
 	#  b=[nil,nil,5,6,4,5,10,2].to_vector(:scale)
@@ -89,8 +89,8 @@ class Vector
         def dup
             Vector.new(@data.dup,@type,@missing_values.dup,@labels.dup)
         end
-        # Returns an empty duplicate of the vector. Maintains the type, missing
-        # values, labels
+        # Returns an empty duplicate of the vector. Maintains the type,
+        # missing values and labels.
         def dup_empty
             Vector.new([],@type,@missing_values.dup,@labels.dup)
         end
@@ -123,7 +123,7 @@ class Vector
         alias_method :standarized, :vector_standarized
-        def box_cox_transformation(lambda)
+        def box_cox_transformation(lambda) # :nodoc:
             raise "Should be a scale" unless @type==:scale
             @data_with_nils.collect{|x|
             if !x.nil?
@@ -162,21 +162,20 @@ class Vector
         end
         # Modifies current vector, with data modified by block.
         # Equivalent to #collect! on @data
         def recode!
             @data.collect!{|x|
                 yield x
             }
             set_valid_data
         end
-        # Iterate on each item
+        # Iterate on each item.
         # Equivalent to
         #   @data.each{|x| yield x}
         def each
             @data.each{|x| yield(x) }
         end
-        # Iterate on each item_index
+        # Iterate on each item, retrieving index
         def each_index
             (0...@data.size).each {|i|
@@ -185,16 +184,27 @@ class Vector
         end
         # Add a value at the end of the vector.
         # If second argument set to false, you should update the Vector usign
-        # Vector#set_valid_data at the end of your insertion cycle
+        # Vector.set_valid_data at the end of your insertion cycle
         #
         def add(v,update_valid=true)
             @data.push(v)
             set_valid_data if update_valid
         end
         # Update valid_data, missing_data, data_with_nils and gsl
-        # at the end of an insertion
+        # at the end of an insertion.
         #
-        # Use after add(v,false)
+        # Use after Vector.add(v,false)
+        # Usage:
+        #   v=Statsample::Vector.new
+        #   v.add(2,false)
+        #   v.add(4,false)
+        #   v.data
+        #   => [2,3]
+        #   v.valid_data
+        #   => []
+        #   v.set_valid_data
+        #   v.valid_data
+        #   => [2,3]
         def set_valid_data
 			@valid_data.clear
 			@missing_data.clear
@@ -203,17 +213,17 @@ class Vector
             set_valid_data_intern
             set_scale_data if(@type==:scale)
 		end
         if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
-            def set_valid_data_intern
+            def set_valid_data_intern #:nodoc:
                 Statsample::STATSAMPLE__.set_valid_data_intern(self)
             end
         else
-            def set_valid_data_intern
+            def set_valid_data_intern #:nodoc:
                 _set_valid_data_intern
             end
         end
-        def _set_valid_data_intern
+        def _set_valid_data_intern #:nodoc:
             @data.each do |n|
                 if is_valid? n
                     @valid_data.push(n)
@@ -225,15 +235,17 @@ class Vector
             end
             @has_missing_data=@missing_data.size>0
         end
         # Retrieves true if data has one o more missing values
         def has_missing_data?
             @has_missing_data
         end
+        # Retrieves label for value x. Retrieves x if
+        # no label defined.
         def labeling(x)
             @labels.has_key?(x) ? @labels[x].to_s : x.to_s
         end
-        # Returns a Vector with the data with labels replaced by the label.
+        # Returns a Vector with data with labels replaced by the label.
         def vector_labeled
             d=@data.collect{|x|
                 if @labels.has_key? x
@@ -273,11 +285,11 @@ class Vector
 			@type=t
             set_scale_data if(t==:scale)
 		end
         def to_a
             @data.dup
         end
         alias_method :to_ary, :to_a
         # Vector sum.
         # - If v is a scalar, add this value to all elements
         # - If v is a Array or a Vector, should be of the same size of this vector
@@ -296,8 +308,8 @@ class Vector
         def -(v)
             _vector_ari("-",v)
         end
-        # Reports all values that doesn't comply with a condition
-        # Returns a hash with the index of data and the invalid data
+        # Reports all values that doesn't comply with a condition.
+        # Returns a hash with the index of data and the invalid data.
         def verify
             h={}
             (0...@data.size).to_a.each{|i|
@@ -401,7 +413,7 @@ class Vector
         # only with valid data.
         #
         # In all the trails, every item have the same probability
-        # of been selected
+        # of been selected.
 		def sample_with_replacement(sample=1)
             if(@type!=:scale or !HAS_GSL)
                 vds=@valid_data.size
@@ -414,8 +426,9 @@ class Vector
         # Returns an random sample of size n, without replacement,
         # only with valid data.
         #
-        # Every element could only be selected once
-        # A sample of the same size of the vector is the vector itself
+        # Every element could only be selected once.
+        #
+        # A sample of the same size of the vector is the vector itself.
         def sample_without_replacement(sample=1)
             if(@type!=:scale or !HAS_GSL)
@@ -432,6 +445,11 @@ class Vector
                 r.choose(@gsl, sample).to_a
             end
          end
+         # Retrieves number of cases which comply condition.
+         # If block given, retrieves number of instances where
+         # block returns true.
+         # If other values given, retrieves the frequency for
+         # this value.
         def count(x=false)
             if block_given?
                 r=@data.inject(0) {|s, i|
@@ -443,7 +461,8 @@ class Vector
                 frequencies[x].nil? ? 0 : frequencies[x]
             end
         end
-        # returns the database type for the vector, according to its content
+        # Returns the database type for the vector, according to its content
         def db_type(dbs='mysql')
             # first, detect any character not number
@@ -465,11 +484,12 @@ class Vector
                 true
             end
         end
         def to_s
             sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
         end
         # Ugly name. Really, create a Vector for standard 'matrix' package.
-        # <tt>dir</tt> could. be :horizontal or :vertical
+        # <tt>dir</tt> could be :horizontal or :vertical
         def to_matrix(dir=:horizontal)
             case dir
             when :horizontal
@@ -481,9 +501,7 @@ class Vector
 		def inspect
 			self.to_s
 		end
-        def as_r
-            @data.dup
-        end
+        # Retrieves uniques values for data.
         def factors
             if @type==:scale
                 @scale_data.uniq.sort
@@ -492,17 +510,17 @@ class Vector
             end
         end
         if Statsample::STATSAMPLE__.respond_to?(:frequencies)
-            # Returns a hash with the distribution of frecuencies of
+            # Returns a hash with the distribution of frecuencies for
             # the sample
             def frequencies
                 Statsample::STATSAMPLE__.frequencies(@valid_data)
             end
         else
-            def frequencies
+            def frequencies #:nodoc:
                 _frequencies
             end
 		end
-        def _frequencies
+        def _frequencies #:nodoc:
             @valid_data.inject(Hash.new) {|a,x|
                 a[x]||=0
                 a[x]=a[x]+1
@@ -589,7 +607,8 @@ class Vector
             end
             def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
                 Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
-            end
+            end
 		self.instance_methods.find_all{|met| met=~/_slow$/}.each{|met|
 			met_or=met.gsub("_slow","")
 			if !self.method_defined?(met_or)
@@ -672,8 +691,7 @@ class Vector
             # The arithmetical mean of data
 			def mean
                 check_type :scale
-					sum.to_f.quo(n_valid)
+                sum.to_f.quo(n_valid)
 			end
             # Sum of squares for the data around a value.
             # By default, this value is the  mean

data/test/test_combination.rb CHANGED Viewed

@@ -31,8 +31,8 @@ class StatsampleCombinationTestCase < Test::Unit::TestCase
         rb.each{|y|
             rb_array.push(y)
         }
-        assert(gsl.d.is_a? Statsample::Combination::CombinationGsl)
-        assert(rb.d.is_a?  Statsample::Combination::CombinationRuby)
+        assert(gsl.d.is_a?(Statsample::Combination::CombinationGsl))
+        assert(rb.d.is_a?(Statsample::Combination::CombinationRuby))
         assert_equal(rb_array,gsl_array)
     else

data/test/test_distribution.rb CHANGED Viewed

@@ -4,7 +4,7 @@ require 'test/unit'
 begin
     require 'rbgsl'
     NOT_GSL=false
-rescue
+rescue LoadError
     NOT_GSL=true
 end
 class DistributionTestCase < Test::Unit::TestCase

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: statsample
 version: !ruby/object:Gem::Version
-  version: 0.4.0
+  version: 0.4.1
 platform: ruby
 authors:
 - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-09-10 00:00:00 -04:00
+date: 2009-09-12 00:00:00 -04:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency