RubyGems - statsample - Versions diffs - 0.6.5 → 0.6.7 - Mend

statsample 0.6.5 → 0.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

data/History.txt +15 -0
data/Manifest.txt +6 -0
data/README.txt +30 -12
data/Rakefile +91 -0
data/demo/levene.rb +9 -0
data/demo/multiple_regression.rb +1 -7
data/demo/polychoric.rb +1 -0
data/demo/principal_axis.rb +8 -0
data/lib/distribution/f.rb +22 -22
data/lib/spss.rb +99 -99
data/lib/statsample/bivariate/polychoric.rb +32 -22
data/lib/statsample/bivariate/tetrachoric.rb +212 -207
data/lib/statsample/bivariate.rb +6 -6
data/lib/statsample/codification.rb +65 -65
data/lib/statsample/combination.rb +60 -59
data/lib/statsample/converter/csv19.rb +12 -12
data/lib/statsample/converters.rb +1 -1
data/lib/statsample/dataset.rb +93 -36
data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
data/lib/statsample/dominanceanalysis.rb +5 -6
data/lib/statsample/factor/pca.rb +41 -11
data/lib/statsample/factor/principalaxis.rb +105 -29
data/lib/statsample/factor/rotation.rb +20 -3
data/lib/statsample/factor.rb +1 -1
data/lib/statsample/graph/gdchart.rb +13 -13
data/lib/statsample/graph/svggraph.rb +166 -167
data/lib/statsample/matrix.rb +22 -12
data/lib/statsample/mle/logit.rb +3 -2
data/lib/statsample/mle/probit.rb +7 -5
data/lib/statsample/mle.rb +4 -2
data/lib/statsample/multiset.rb +125 -124
data/lib/statsample/permutation.rb +2 -1
data/lib/statsample/regression/binomial/logit.rb +4 -3
data/lib/statsample/regression/binomial/probit.rb +2 -1
data/lib/statsample/regression/binomial.rb +62 -81
data/lib/statsample/regression/multiple/baseengine.rb +1 -1
data/lib/statsample/regression/multiple/gslengine.rb +1 -1
data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
data/lib/statsample/regression/multiple.rb +15 -42
data/lib/statsample/regression/simple.rb +93 -78
data/lib/statsample/regression.rb +74 -2
data/lib/statsample/reliability.rb +117 -120
data/lib/statsample/srs.rb +156 -153
data/lib/statsample/test/levene.rb +90 -0
data/lib/statsample/test/umannwhitney.rb +25 -9
data/lib/statsample/test.rb +2 -0
data/lib/statsample/vector.rb +388 -413
data/lib/statsample.rb +74 -30
data/po/es/statsample.mo +0 -0
data/test/test_bivariate.rb +5 -4
data/test/test_combination.rb +1 -1
data/test/test_dataset.rb +2 -2
data/test/test_factor.rb +53 -6
data/test/test_gsl.rb +1 -1
data/test/test_mle.rb +1 -1
data/test/test_regression.rb +18 -33
data/test/test_statistics.rb +15 -33
data/test/test_stest.rb +35 -0
data/test/test_svg_graph.rb +2 -2
data/test/test_vector.rb +331 -333
metadata +38 -11

data/lib/statsample/codification.rb CHANGED Viewed

@@ -1,32 +1,32 @@
 require 'yaml'
 module Statsample
-# This module aids to code open questions
-# * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
-# * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
-# * Recode the vectors, loading the yaml file:
-#   * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
-#   * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
-#
-# Usage:
-#   recode_file="recodification.yaml"
-#   phase=:first # flag
-#   if phase==:first
-#     File.open(recode_file,"w") {|fp|
-#       Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
-#     }
-#   # Edit the file recodification.yaml and verify changes
-#   elsif phase==:second
-#     File.open(recode_file,"r") {|fp|
-#       Statsample::Codification.verify(fp,['vector1'])
-#     }
-#   # Add new vectors to the dataset
-#   elsif phase==:third
-#     File.open(recode_file,"r") {|fp|
-#       Statsample::Codification.recode_dataset_split!(ds,fp,"*")
-#     }
-#   end
-#
+  # This module aids to code open questions
+  # * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
+  # * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
+  # * Recode the vectors, loading the yaml file:
+  #   * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
+  #   * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
+  #
+  # Usage:
+  #   recode_file="recodification.yaml"
+  #   phase=:first # flag
+  #   if phase==:first
+  #     File.open(recode_file,"w") {|fp|
+  #       Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
+  #     }
+  #   # Edit the file recodification.yaml and verify changes
+  #   elsif phase==:second
+  #     File.open(recode_file,"r") {|fp|
+  #       Statsample::Codification.verify(fp,['vector1'])
+  #     }
+  #   # Add new vectors to the dataset
+  #   elsif phase==:third
+  #     File.open(recode_file,"r") {|fp|
+  #       Statsample::Codification.recode_dataset_split!(ds,fp,"*")
+  #     }
+  #   end
+  #
   module Codification
     class << self
       # Create a hash, based on vectors, to create the dictionary.
@@ -38,7 +38,7 @@ module Statsample
           raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
           v=dataset[v_name]
           split_data=v.splitted(sep).flatten.collect {|c| c.to_s}.find_all {|c| !c.nil?}
           factors=split_data.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac }
           h[v_name]=factors
           h
@@ -48,7 +48,7 @@ module Statsample
       # Create a yaml to create a dictionary, based on vectors
       # The keys will be vectors name on dataset and the values
       # will be hashes, with keys = values, for recodification
-      #
+      #
       #   v1=%w{a,b b,c d}.to_vector
       #   ds={"v1"=>v1}.to_dataset
       #   Statsample::Codification.create_yaml(ds,['v1'])
@@ -63,7 +63,7 @@ module Statsample
       # * field: name of vector
       # * original: original name
       # * recoded: new code
       def create_excel(dataset, vectors, filename, sep=Statsample::SPLIT_TOKEN)
         require 'spreadsheet'
         if File.exists?(filename)
@@ -98,7 +98,7 @@ module Statsample
         end
         h
       end
       def inverse_hash(h, sep=Statsample::SPLIT_TOKEN)
         h.inject({}) do |a,v|
           v[1].split(sep).each do |val|
@@ -108,11 +108,11 @@ module Statsample
           a
         end
       end
       def dictionary(h, sep=Statsample::SPLIT_TOKEN)
         h.inject({}) {|a,v| a[v[0]]=v[1].split(sep); a }
       end
       def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
         dict=dictionary(h,sep)
         new_data=v.splitted(sep)
@@ -125,45 +125,45 @@ module Statsample
         end
       end
       def recode_dataset_simple!(dataset, dictionary_hash ,sep=Statsample::SPLIT_TOKEN)
-            _recode_dataset(dataset,dictionary_hash ,sep,false)
-        end
-        def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
-            _recode_dataset(dataset, dictionary_hash, sep,true)
-        end
-        def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
-          v_names||=h.keys
-          v_names.each do |v_name|
-            raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
-            recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
-              if c.nil?
-                  nil
-              else
-                  c.join(sep)
-              end
-            }.to_vector
-            if(split)
+        _recode_dataset(dataset,dictionary_hash ,sep,false)
+      end
+      def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
+        _recode_dataset(dataset, dictionary_hash, sep,true)
+      end
+      def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
+        v_names||=h.keys
+        v_names.each do |v_name|
+          raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
+          recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
+            if c.nil?
+              nil
+            else
+              c.join(sep)
+            end
+          }.to_vector
+          if(split)
             recoded.split_by_separator(sep).each {|k,v|
               dataset[v_name+"_"+k]=v
             }
-            else
-              dataset[v_name+"_recoded"]=recoded
-            end
+          else
+            dataset[v_name+"_recoded"]=recoded
           end
         end
-        def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
-          require 'pp'
-          v_names||=h.keys
-          v_names.each{|v_name|
-            inverse=inverse_hash(h[v_name],sep)
-            io.puts "- Field: #{v_name}"
-            inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
-              io.puts "  - \"#{k}\" (#{v.count}) :\n    -'"+v.join("\n    -'")+"'"
-            }
+      end
+      def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
+        require 'pp'
+        v_names||=h.keys
+        v_names.each{|v_name|
+          inverse=inverse_hash(h[v_name],sep)
+          io.puts "- Field: #{v_name}"
+          inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
+            io.puts "  - \"#{k}\" (#{v.count}) :\n    -'"+v.join("\n    -'")+"'"
           }
-        end
+        }
+      end
     end
   end
 end

data/lib/statsample/combination.rb CHANGED Viewed

@@ -1,8 +1,7 @@
 module Statsample
   # Combination class systematically generates all combinations of n elements, taken r at a time.
   # With rbgsl, GSL::Combination is available for extra speed
-  # Source: http://snippets.dzone.com/posts/show/4666
-  # Use:
+  # == Use:
   #  comb=Statsample::Combination.new(3,5)
   #  => #<Statsample::Combination:0x7f6323804e08 @n=5, @d=#<Statsample::Combination::CombinationGsl:0x7f63237ff7f0 @n=5, @k=3, @c=GSL::Combination>, @k=3>
   #  comb.each{|c| p c }
@@ -16,23 +15,25 @@ module Statsample
   #  [1, 2, 4]
   #  [1, 3, 4]
   #  [2, 3, 4]
+  # == Reference:
+  # * http://snippets.dzone.com/posts/show/4666
   #
   class Combination
     attr_reader :d
     def initialize(k,n,only_ruby=false)
-        @k=k
-        @n=n
-        if HAS_GSL and !only_ruby
-            @d=CombinationGsl.new(@k,@n)
-        else
-            @d=CombinationRuby.new(@k,@n)
-        end
+      @k=k
+      @n=n
+      if Statsample.has_gsl? and !only_ruby
+        @d=CombinationGsl.new(@k,@n)
+      else
+        @d=CombinationRuby.new(@k,@n)
+      end
     end
     def each
-        reset
-        while a=next_value
-            yield a
-        end
+      reset
+      while a=next_value
+        yield a
+      end
     end
     def reset
         @d.reset
@@ -43,70 +44,70 @@ module Statsample
     class CombinationRuby # :nodoc:
       attr_reader :data
       def initialize(k,n)
-          raise "k<=n" if k>n
-          @k=k
-          @n=n
-          reset
+        raise "k<=n" if k>n
+        @k=k
+        @n=n
+        reset
       end
       def reset
-          @data=[]
-          (0...@k).each {|i| @data[i] = i }
+        @data=[]
+        (0...@k).each {|i| @data[i] = i }
       end
       def each
-          reset
-          while a=next_value
-              yield a
-          end
+        reset
+        while a=next_value
+            yield a
+        end
       end
       def next_value
-          return false if !@data
-          old_comb=@data.dup
-          i = @k - 1;
-          @data[i]+=1
-          while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
-              i-=1;
-              @data[i]+=1;
-          end
-          if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
-              @data=false # No more combinations can be generated
-          else
-              # comb now looks like (..., x, n, n, n, ..., n).
-              # Turn it into (..., x, x + 1, x + 2, ...)
-              i = i+1
-              (i...@k).each{ |i1|
-                  @data[i1] = @data[i1 - 1] + 1
-              }
-          end
-          return old_comb
+        return false if !@data
+        old_comb=@data.dup
+        i = @k - 1;
+        @data[i]+=1
+        while ((i >= 0) and (@data[i] >= @n - @k + 1 + i)) do
+          i-=1;
+          @data[i]+=1;
+        end
+        if (@data[0] > @n - @k) # Combination (n-k, n-k+1, ..., n) reached */
+          @data=false # No more combinations can be generated
+        else
+          # comb now looks like (..., x, n, n, n, ..., n).
+          # Turn it into (..., x, x + 1, x + 2, ...)
+          i = i+1
+          (i...@k).each{ |i1|
+              @data[i1] = @data[i1 - 1] + 1
+          }
+        end
+        return old_comb
       end
     end
     # rb-gsl engine for Combinations
     class CombinationGsl # :nodoc:
       def initialize(k,n)
-          require 'gsl'
-          raise "k<=n" if k>n
-          @k=k
-          @n=n
-          reset
+        require 'gsl'
+        raise "k<=n" if k>n
+        @k=k
+        @n=n
+        reset
       end
       def reset
-          @c= ::GSL::Combination.calloc(@n, @k);
+        @c= ::GSL::Combination.calloc(@n, @k);
       end
       def next_value
-          return false if !@c
-          data=@c.data.to_a
-          if @c.next != GSL::SUCCESS
-              @c=false
-          end
-          return data
+        return false if !@c
+        data=@c.data.to_a
+        if @c.next != GSL::SUCCESS
+          @c=false
+        end
+        return data
       end
       def each
-          reset
-          begin
-            yield @c.data.to_a
-          end while @c.next == GSL::SUCCESS
+        reset
+        begin
+        yield @c.data.to_a
+        end while @c.next == GSL::SUCCESS
       end
     end
   end

data/lib/statsample/converter/csv19.rb CHANGED Viewed

@@ -1,10 +1,10 @@
 module Statsample
   class CSV < SpreadsheetBase
-	  class << self
-        # Returns a Dataset  based on a csv file
-        #
-        # USE:
-        #     ds=Statsample::CSV.read("test_csv.csv")
+    class << self
+      # Returns a Dataset  based on a csv file
+      #
+      # USE:
+      #     ds=Statsample::CSV.read("test_csv.csv")
       def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
         require 'csv'
         first_row=true
@@ -36,17 +36,17 @@ module Statsample
         ds.update_valid_data
         ds
       end
-        # Save a Dataset on a csv file
-        #
-        # USE:
-        #     Statsample::CSV.write(ds,"test_csv.csv")
+      # Save a Dataset on a csv file
+      #
+      # USE:
+      #     Statsample::CSV.write(ds,"test_csv.csv")
       def write(dataset,filename, convert_comma=false,*opts)
-        require 'csv'
+        require 'csv'
         writer=::CSV.open(filename,'w',*opts)
         writer << dataset.fields
         dataset.each_array do|row|
           if(convert_comma)
-              row.collect!{|v| v.to_s.gsub(".",",")}
+            row.collect!{|v| v.to_s.gsub(".",",")}
           end
           writer << row
         end
@@ -54,4 +54,4 @@ module Statsample
       end
     end
   end
-end
+end

data/lib/statsample/converters.rb CHANGED Viewed

@@ -175,7 +175,7 @@ raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all
           end
         }
       end
-      private :process_row
+      private :process_row, :preprocess_row
       # Returns a dataset based on a xls file
       # USE: