RubyGems - statsample - Versions diffs - 0.12.0 → 0.13.0 - Mend

statsample 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

data.tar.gz.sig +2 -1
data/History.txt +11 -0
data/Manifest.txt +2 -3
data/README.txt +0 -17
data/Rakefile +10 -9
data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
data/examples/principal_axis.rb +2 -0
data/examples/u_test.rb +8 -0
data/lib/distribution.rb +1 -1
data/lib/statsample.rb +12 -12
data/lib/statsample/anova/oneway.rb +4 -4
data/lib/statsample/bivariate.rb +10 -3
data/lib/statsample/bivariate/pearson.rb +55 -0
data/lib/statsample/dataset.rb +57 -49
data/lib/statsample/dominanceanalysis.rb +1 -2
data/lib/statsample/dominanceanalysis/bootstrap.rb +46 -54
data/lib/statsample/factor.rb +0 -1
data/lib/statsample/factor/parallelanalysis.rb +9 -13
data/lib/statsample/factor/pca.rb +5 -10
data/lib/statsample/factor/principalaxis.rb +27 -33
data/lib/statsample/matrix.rb +11 -11
data/lib/statsample/mle.rb +0 -1
data/lib/statsample/regression.rb +0 -1
data/lib/statsample/reliability.rb +2 -2
data/lib/statsample/reliability/multiscaleanalysis.rb +62 -15
data/lib/statsample/reliability/scaleanalysis.rb +5 -6
data/lib/statsample/test/f.rb +2 -5
data/lib/statsample/test/levene.rb +2 -5
data/lib/statsample/test/t.rb +4 -13
data/lib/statsample/test/umannwhitney.rb +19 -19
data/po/es/statsample.mo +0 -0
data/po/es/statsample.po +304 -111
data/po/statsample.pot +224 -90
data/test/test_bivariate.rb +8 -69
data/test/test_reliability.rb +3 -4
metadata +30 -18
metadata.gz.sig +0 -0
data/lib/statsample/bivariate/polychoric.rb +0 -893
data/lib/statsample/bivariate/tetrachoric.rb +0 -457
data/test/test_bivariate_polychoric.rb +0 -70

data.tar.gz.sig CHANGED Viewed

@@ -1 +1,2 @@
-�HG��@��^��uH�
+�2�����׌��Ab���Է����;P�
+��m��Iƚ��Xr�(ηV����:Pd�Y�����L���ϡ���-R���'��;�2l�n'40XH���!�(�l=�,�gX�|�N���{L�--�sWtr/b��^L�-tB?�I%�H�o�fk#HI��uc�V��c�0 ��T���x��)����v�(�f<0$�Zev��S��^�t*F̞�@�U�2d���8���(v��JUs�Q6�Ǐ/�#���S�;

data/History.txt CHANGED Viewed

@@ -1,3 +1,14 @@
+=== 0.13.0 / 2010-06-13
+* Polychoric and Tetrachoric moved to gem statsample-bivariate-extension
+* All classes left with summary method include Summarizable now. Every method which return localizable string is now parsed with _()
+* Correct implementation of Reliability::MultiScaleAnalysis.
+* Spanish translation for Mann-Whitney's U
+* Added example for Mann-Whitney's U test
+* Better summary for Mann-Whitney's U Test
+* Added Statsample::Bivariate::Pearson class to retrieve complete analysis for r correlations
+* Bug fix on DominanceAnalysis::Bootstrap
 === 0.12.0 / 2010-06-09
 * Modified Rakefile to remove dependencies based on C extensions. These are moved to statsample-optimization

data/Manifest.txt CHANGED Viewed

@@ -25,6 +25,7 @@ examples/principal_axis.rb
 examples/reliability.rb
 examples/t_test.rb
 examples/tetrachoric.rb
+examples/u_test.rb
 examples/vector.rb
 lib/distribution.rb
 lib/distribution/chisquare.rb
@@ -39,8 +40,7 @@ lib/statsample/anova.rb
 lib/statsample/anova/oneway.rb
 lib/statsample/anova/twoway.rb
 lib/statsample/bivariate.rb
-lib/statsample/bivariate/polychoric.rb
-lib/statsample/bivariate/tetrachoric.rb
+lib/statsample/bivariate/pearson.rb
 lib/statsample/codification.rb
 lib/statsample/combination.rb
 lib/statsample/converter/csv.rb
@@ -101,7 +101,6 @@ test/test_anovatwoway.rb
 test/test_anovatwowaywithdataset.rb
 test/test_anovawithvectors.rb
 test/test_bivariate.rb
-test/test_bivariate_polychoric.rb
 test/test_codification.rb
 test/test_combination.rb
 test/test_crosstab.rb

data/README.txt CHANGED Viewed

@@ -76,23 +76,6 @@ Include:
     cm=Statsample::Bivariate.correlation_matrix(ds)
     puts cm.summary
-=== Tetrachoric correlation
-    require 'statsample'
-    a=40
-    b=10
-    c=20
-    d=30
-    tetra=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
-    puts tetra.summary
-=== Polychoric correlation
-    require 'statsample'
-    ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
-    poly=Statsample::Bivariate::Polychoric.new(ct)
-    puts poly.summary
 == REQUIREMENTS:

data/Rakefile CHANGED Viewed

@@ -23,13 +23,13 @@ task :release do
 system %{git push origin master}
 end
 desc "Update pot/po files."
-task :updatepo do
+task "gettext:updatepo" do
   require 'gettext/tools'
   GetText.update_pofiles("statsample", Dir.glob("{lib,bin}/**/*.{rb,rhtml}"), "statsample #{Statsample::VERSION}")
 end
 desc "Create mo-files"
-task :makemo do
+task "gettext:makemo" do
   require 'gettext/tools'
   GetText.create_mofiles()
   # GetText.create_mofiles(true, "po", "locale")  # This is for "Ruby on Rails".
@@ -40,7 +40,8 @@ h=Hoe.spec('statsample') do
   #self.testlib=:minitest
 	self.rubyforge_name = "ruby-statsample"
 	self.developer('Claudio Bustos', 'clbustos@gmail.com')
-	self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.2.0"]
+	self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.2.0"] << ["statsample-bivariate-extension", "~>0.13.0"]
 	self.extra_dev_deps << ["shoulda"]
   self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
   self.post_install_message = <<-EOF
@@ -51,13 +52,13 @@ On *nix, you should install statsample-optimization
 to retrieve gems gsl, statistics2 and a C extension
 to speed some methods.
-  $sudo gem install statsample-optimization
+  $ sudo gem install statsample-optimization
-To use it, on Ubuntu I recommend install
-build-essential and libgsl0-dev using apt-get and
-compile ruby 1.8 or 1.9 from source code first.
+On Ubuntu, install  build-essential and libgsl0-dev
+using apt-get and  compile ruby 1.8 or 1.9 from
+source code first.
-  $sudo apt-get install build-essential libgsl0-dev
+  $ sudo apt-get install build-essential libgsl0-dev
 *****************************************************
@@ -90,7 +91,7 @@ Rake::RDocTask.new(:docs) do |rd|
 end
-desc 'publicar a rdocs con analytics'
+desc 'Publish rdocs with analytics support'
 task :publicar_docs => [:clean, :docs] do
   ruby %{agregar_adsense_a_doc.rb}
   path = File.expand_path("~/.rubyforge/user-config.yml")

data/data/locale/es/LC_MESSAGES/statsample.mo CHANGED Viewed

Binary file

data/examples/principal_axis.rb CHANGED Viewed

@@ -4,5 +4,7 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
 require 'statsample'
 matrix=Matrix[
 [1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807],  [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
+matrix.extend Statsample::CovariateMatrix
+#matrix.fields=%w{a b c d}
 fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1,:smc=>false)
 puts fa.summary

data/examples/u_test.rb ADDED Viewed

@@ -0,0 +1,8 @@
+#!/usr/bin/ruby
+$:.unshift(File.dirname(__FILE__)+'/../lib')
+require 'statsample'
+a=10.times.map {rand(100)}.to_scale
+b=20.times.map {(rand(20))**2+50}.to_scale
+u=Statsample::Test::UMannWhitney.new(a,b)
+puts u.summary

data/lib/distribution.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 begin
     require 'statistics2'
 rescue LoadError
-        puts "You should install statistics2"
+    puts "You should install statistics2"
 end
 # Several distributions modules to calculate cdf, inverse cdf and pdf
 # See Distribution::Pdf for interface.

data/lib/statsample.rb CHANGED Viewed

@@ -31,9 +31,9 @@ end
 class String
   def is_number?
     if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
-        true
+      true
     else
-        false
+      false
     end
   end
 end
@@ -112,7 +112,7 @@ module Statsample
       false
     end
   end
-  VERSION = '0.12.0'
+  VERSION = '0.13.0'
   SPLIT_TOKEN = ","
   autoload(:Database, 'statsample/converters')
   autoload(:Anova, 'statsample/anova')
@@ -201,19 +201,19 @@ module Statsample
-	module Util
+  module Util
     # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
     def normal_order_statistic_medians(i,n)
       if i==1
-          u= 1.0 - normal_order_statistic_medians(n,n)
+        u= 1.0 - normal_order_statistic_medians(n,n)
       elsif i==n
-          u=0.5**(1 / n.to_f)
+        u=0.5**(1 / n.to_f)
       else
-          u= (i - 0.3175) / (n + 0.365)
+        u= (i - 0.3175) / (n + 0.365)
       end
       u
     end
-	end
+  end
@@ -224,7 +224,7 @@ module Statsample
       fp.close
     end
   end
-  # Provides basic method to generate summaries
+  # Provides method summary to generate summaries and include GetText
   module Summarizable
     include GetText
     bindtextdomain("statsample")
@@ -242,12 +242,12 @@ end
 begin
   require 'statsamplert'
 rescue LoadError
-  module Statsample
-      OPTIMIZED=false
+  module Statsample
+    OPTIMIZED=false
   end
 end
 require 'statsample/vector'
 require 'statsample/dataset'
 require 'statsample/crosstab'
-require 'statsample/matrix'
+require 'statsample/matrix'

data/lib/statsample/anova/oneway.rb CHANGED Viewed

@@ -32,8 +32,8 @@ module Statsample
         @ss_total=@ss_num+@ss_den
         @ms_total=@ms_num+@ms_den
         opts_default={:name=>"ANOVA",
-                      :name_denominator=>"Explained variance",
-                      :name_numerator=>"Unexplained variance"}
+                      :name_denominator=>_("Explained variance"),
+                      :name_numerator=>_("Unexplained variance")}
         @opts=opts_default.merge(opts)
         opts_default.keys.each {|k|
           send("#{k}=", @opts[k])
@@ -89,8 +89,8 @@ module Statsample
         end
         opts||=Hash.new
         opts_default={:name=>_("Anova One-Way"),
-                      :name_numerator=>"Between Groups",
-                      :name_denominator=>"Within Groups",
+                      :name_numerator=>_("Between Groups"),
+                      :name_denominator=>_("Within Groups"),
                       :summary_descriptives=>false,
                       :summary_levene=>false}
         @opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)

data/lib/statsample/bivariate.rb CHANGED Viewed

@@ -1,8 +1,15 @@
-require 'statsample/bivariate/tetrachoric'
-require 'statsample/bivariate/polychoric'
+require 'statsample/bivariate/pearson'
 module Statsample
-  # Diverse bivariate methods, including #covariance, #pearson correlation (r), #spearman ranked correlation (rho), #tetrachoric correlation and #polychoric correlation.
+  # Diverse methods and classes to calculate bivariate relations
+  # Specific classes:
+  # * Statsample::Bivariate::Pearson : Pearson correlation coefficient (r)
+  # * Statsample::Bivariate::Tetrachoric : Tetrachoric correlation
+  # * Statsample::Bivariate::Polychoric  : Polychoric correlation (using joint, two-step and polychoric series)
   module Bivariate
+    autoload(:Polychoric, "statsample/bivariate/polychoric")
+    autoload(:Tetrachoric, "statsample/bivariate/tetrachoric")
     class << self
       # Covariance between two vectors
       def covariance(v1,v2)

data/lib/statsample/bivariate/pearson.rb ADDED Viewed

@@ -0,0 +1,55 @@
+module Statsample
+  module Bivariate
+    # = Pearson correlation coefficient (r)
+    #
+    # The moment-product Pearson's correlation coefficient, known as 'r'
+    # is a measure of bivariate associate between two continous
+    # variables.
+    #
+    # == Usage
+    #   a = [1,2,3,4,5,6].to_scale
+    #   b = [2,3,4,5,6,7].to_scale
+    #   pearson = Statsample::Bivariate::Pearson.new(a,b)
+    #   puts pearson.r
+    #   puts pearson.t
+    #   puts pearson.probability
+    #
+    #   puts pearson.summary
+    #
+    class Pearson
+      include Statsample::Test
+      include Summarizable
+      # Name of correlation
+      attr_accessor :name
+      # Tails for probability (:both, :left or :right)
+      attr_accessor :tails
+      attr_accessor :n
+      def initialize(v1,v2,opts=Hash.new)
+        @v1_name,@v2_name = v1.name,v2.name
+        @v1,@v2           = Statsample.only_valid_clone(v1,v2)
+        @n=@v1.size
+        opts_default={
+          :name=>_("Correlation (%s - %s)") % [@v1_name, @v2_name],
+          :tails=>:both
+        }
+        @opts=opts.merge(opts_default)
+        @opts.each{|k,v|
+          self.send("#{k}=",v) if self.respond_to? k
+        }
+      end
+      def r
+        Statsample::Bivariate.pearson(@v1,@v2)
+      end
+      def t
+        Statsample::Bivariate.t_pearson(@v1,@v2)
+      end
+      def probability
+        p_using_cdf(Distribution::T.cdf(t, @v1.size-2), tails)
+      end
+      def report_building(builder)
+        builder.text(_("%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s tails)") % [@name, r,t, (n-2), probability, tails])
+      end
+    end
+  end
+end

data/lib/statsample/dataset.rb CHANGED Viewed

@@ -39,13 +39,15 @@ module Statsample
   #
   #
   # ==Usage
-  # Create a empty dataset
+  # Create a empty dataset:
   #   Dataset.new()
-  # Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt>
+  # Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt>:
   #   Dataset.new(%w{v1 v2 v3})
-  # Create a dataset with two vectors
+  # Create a dataset with two vectors, called <tt>v1</tt>
+  # and <tt>v2</tt>:
   #   Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
-  # Create a dataset with two given vectors (v1 and v2), with vectors on inverted order
+  # Create a dataset with two given vectors (v1 and v2),
+  # with vectors on inverted order:
   #   Dataset.new({'v2'=>v2,'v1'=>v1},['v2','v1'])
   #
   # The fast way to create a dataset uses Hash#to_dataset, with
@@ -59,7 +61,7 @@ module Statsample
     include Summarizable
     # Hash of Statsample::Vector
     attr_reader :vectors
-    # Ordered names of vectors
+    # Ordered ids of vectors
     attr_reader :fields
     # Name of dataset
     attr_accessor:name
@@ -67,7 +69,7 @@ module Statsample
     attr_reader :cases
     # Location of pointer on enumerations methods (like #each)
     attr_reader :i
     # Generates a new dataset, using three vectors
     # - Rows
     # - Columns
@@ -87,7 +89,8 @@ module Statsample
     #    b  1   0
     #
     # Useful to process outputs from databases
-    #
+    #
     def self.crosstab_by_asignation(rows,columns,values)
       raise "Three vectors should be equal size" if rows.size!=columns.size or rows.size!=values.size
       cols_values=columns.factors
@@ -123,7 +126,6 @@ module Statsample
     # [fields]  Array of names for vectors. Is only used for set the
     # order of variables. If empty, vectors keys on alfabethic order as
     # used as fields
     #
     def initialize(vectors={}, fields=[])
       @@n_dataset||=0
@@ -141,6 +143,10 @@ module Statsample
       end
       @i=nil
     end
+    #
+    # Returns a GSL::matrix
+    #
     def to_gsl_matrix
       matrix=GSL::Matrix.alloc(cases,@vectors.size)
       each_array do |row|
@@ -171,7 +177,7 @@ module Statsample
     end
     # Returns a duplicate of the Database
     # If fields given, only include those vectors.
-    # Every vector will be dup
+    # Every vector will be dup.
     def dup(*fields_to_include)
       if fields_to_include.size==1 and fields_to_include[0].is_a? Array
         fields_to_include=fields_to_include[0]
@@ -186,6 +192,10 @@ module Statsample
       }
       Dataset.new(vectors,fields)
     end
+    # Returns (when possible) a cheap copy of dataset.
+    # If no vector have missing values, returns original vectors.
+    # If missing values presents, uses Dataset.dup_only_valid
+    #
     def clone_only_valid(*fields_to_include)
       if fields_to_include.size==1 and fields_to_include[0].is_a? Array
         fields_to_include=fields_to_include[0]
@@ -240,7 +250,7 @@ module Statsample
       ds_new.update_valid_data
       ds_new
     end
-      # Returns a dataset with standarized data
+    # Returns a dataset with standarized data
     def standarize
       ds=dup()
       ds.fields.each do |f|
@@ -261,15 +271,18 @@ module Statsample
     def ==(d2)
       @vectors==d2.vectors and @fields==d2.fields
     end
+    # Returns vector <tt>c</tt>
     def col(c)
       @vectors[c]
     end
     alias_method :vector, :col
-    def add_vector(name,vector)
+    # Equal to Dataset[<tt>name</tt>]=<tt>vector</tt>
+    def add_vector(name, vector)
       raise ArgumentError, "Vector have different size" if vector.size!=@cases
       @vectors[name]=vector
       check_order
     end
+    # Returns true if dataset have vector <tt>v</tt>
     def has_vector? (v)
       return @vectors.has_key?(v)
     end
@@ -295,8 +308,8 @@ module Statsample
     # * Hash: keys equal to fields
     # If uvd is false, #update_valid_data is not executed after
     # inserting a case. This is very useful if you want to increase the
-    # performance on inserting many cases,
-    # because #update_valid_data performs check on vectors and on the dataset
+    # performance on inserting many cases,  because #update_valid_data
+    # performs check on vectors and on the dataset
     def add_case(v,uvd=true)
       case v
@@ -323,7 +336,7 @@ module Statsample
       @fields.each{|f| @vectors[f].set_valid_data}
       check_length
     end
-    # Delete a vector
+    # Delete vector named <tt>name</tt>.
     def delete_vector(name)
       @fields.delete(name)
       @vectors.delete(name)
@@ -345,26 +358,27 @@ module Statsample
         add_vector(name+join+k,v)
       }
     end
-		def vector_by_calculation(type=:scale)
-			a=[]
-			each {|row|
-				a.push(yield(row))
-			}
-			a.to_vector(type)
-		end
-		# Returns a vector with sumatory of fields
-		# if fields parameter is empty, sum all fields
-		def vector_sum(fields=nil)
-			a=[]
-			fields||=@fields
-			collect_with_index do |row, i|
-				if(fields.find{|f| !@vectors[f].data_with_nils[i]})
-					nil
-				else
-					fields.inject(0) {|ac,v| ac + row[v].to_f}
-				end
-      end
-		end
+    def vector_by_calculation(type=:scale)
+      a=[]
+      each do |row|
+        a.push(yield(row))
+      end
+      a.to_vector(type)
+    end
+    # Returns a vector with sumatory of fields
+    # if fields parameter is empty, sum all fields
+    def vector_sum(fields=nil)
+      a=[]
+      fields||=@fields
+      collect_with_index do |row, i|
+        if(fields.find{|f| !@vectors[f].data_with_nils[i]})
+          nil
+        else
+          fields.inject(0) {|ac,v| ac + row[v].to_f}
+        end
+      end
+    end
     # Check if #fields attribute is correct, after inserting or deleting vectors
     def check_fields(fields)
       fields||=@fields
@@ -614,14 +628,14 @@ module Statsample
       ds
     end
-		# creates a new vector with the data of a given field which the block returns true
-		def filter_field(field)
-			a=[]
-			each {|c|
-				a.push(c[field]) if yield c
-			}
-			a.to_vector(@vectors[field].type)
-		end
+    # creates a new vector with the data of a given field which the block returns true
+    def filter_field(field)
+      a=[]
+      each do |c|
+        a.push(c[field]) if yield c
+      end
+      a.to_vector(@vectors[field].type)
+    end
     def to_multiset_by_split_one_field(field)
       raise ArgumentError,"Should use a correct field name" if !@fields.include? field
@@ -802,8 +816,7 @@ module Statsample
       ds.update_valid_data
       ds
     end
-		def report_building(b)
+    def report_building(b)
       b.section(:name=>@name) do |g|
         g.text _"Cases: %d"  % cases
         @fields.each do |f|
@@ -811,11 +824,6 @@ module Statsample
           g.parse_element(@vectors[f])
         end
       end
-		end
-    def as_r
-      require 'rsruby/dataframe'
-      r=RSRuby.instance
     end
   end
 end