RubyGems - statsample - Versions diffs - 1.4.0 → 1.4.1 - Mend

statsample 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/.gitignore +14 -0
data/Gemfile +1 -16
data/History.txt +51 -46
data/LICENSE.txt +7 -82
data/README.md +145 -150
data/Rakefile +20 -102
data/lib/spss.rb +17 -14
data/lib/statsample/crosstab.rb +2 -2
data/lib/statsample/dataset.rb +82 -81
data/lib/statsample/matrix.rb +43 -43
data/lib/statsample/reliability.rb +1 -2
data/lib/statsample/vector.rb +157 -124
data/lib/statsample/version.rb +1 -1
data/lib/statsample.rb +91 -91
data/references.txt +2 -1
data/statsample.gemspec +89 -0
data/test/test_awesome_print_bug.rb +16 -0
data/test/test_crosstab.rb +8 -0
data/test/test_histogram.rb +7 -0
data/test/test_vector.rb +62 -48
metadata +109 -120
data/.gemtest +0 -0
data/Gemfile.lock +0 -78
data/Manifest.txt +0 -157
data/setup.rb +0 -1585

data/lib/statsample/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Statsample
-  VERSION = '1.4.0'
+  VERSION = '1.4.1'
 end

data/lib/statsample.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-# = statsample.rb -
+# = statsample.rb -
 # Statsample - Statistic package for Ruby
 # Copyright (C) 2008-2014  Claudio Bustos
 #
@@ -17,17 +17,16 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 #
-#$:.unshift(File.dirname(__FILE__))
 require 'matrix'
 require 'extendmatrix'
 require 'distribution'
 require 'dirty-memoize'
 require 'reportbuilder'
 class Numeric
-  def square ; self * self ; end
+  def square
+    self * self
+  end
 end
 class String
@@ -41,10 +40,10 @@ class String
 end
 class Module
-  def include_aliasing(m, suffix="ruby")
+  def include_aliasing(m, suffix = 'ruby')
     m.instance_methods.each do |f|
       if instance_methods.include? f
-        alias_method("#{f}_#{suffix}",f)
+        alias_method("#{f}_#{suffix}", f)
         remove_method f
       end
     end
@@ -60,15 +59,26 @@ class Array
   #   a.recode_repeated
   #   => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
   def recode_repeated
-    if self.size!=self.uniq.size
+    if size != uniq.size
       # Find repeated
-      repeated=self.inject({}) {|a,v|
-      (a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v| k}
-      ns=repeated.inject({}) {|a,v| a[v]=0;a}
-      self.collect do |f|
+      repeated = inject({}) do |acc, v|
+        if acc[v].nil?
+          acc[v] = 1
+        else
+          acc[v] += 1
+        end
+        acc
+      end.select { |_k, v| v > 1 }.keys
+      ns = repeated.inject({}) do |acc, v|
+        acc[v] = 0
+        acc
+      end
+      collect do |f|
         if repeated.include? f
-          ns[f]+=1
-          sprintf("%s_%d",f,ns[f])
+          ns[f] += 1
+          sprintf('%s_%d', f, ns[f])
         else
           f
         end
@@ -79,61 +89,62 @@ class Array
   end
 end
-def create_test(*args,&proc)
-  description=args.shift
-  fields=args
+def create_test(*args, &_proc)
+  description = args.shift
+  fields = args
   [description, fields, Proc.new]
 end
 #--
 # Test extensions
 begin
   require 'gettext'
 rescue LoadError
   def bindtextdomain(d) #:nodoc:
-  d
+    d
   end
   # Bored module
   module GetText  #:nodoc:
-    def _(t)
-        t
+    def _(t)
+      t
     end
   end
 end
 # Library for statistical analysis on Ruby
 #
 # * Classes for manipulation and storage of data:
 # * Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
 # * Multiple types of regression on Statsample::Regression
 # * Factorial Analysis algorithms on Statsample::Factor module.
-# * Dominance Analysis. Based on Budescu and Azen papers.link[http://psycnet.apa.org/journals/met/8/2/129/].
+# * Dominance Analysis. Based on Budescu and Azen papers.link[http://psycnet.apa.org/journals/met/8/2/129/].
 # * Module Statsample::Codification, to help to codify open questions
 # * Converters to import and export data from databases, csv and excel files.
 # * Module Statsample::Crosstab provides function to create crosstab for categorical data
 # * Reliability analysis provides functions to analyze scales.
 # * Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
-# * Interfaces to gdchart, gnuplot and SVG::Graph
+# * Interfaces to gdchart, gnuplot and SVG::Graph
 #
 module Statsample
   def self.create_has_library(library)
     define_singleton_method("has_#{library}?") do
-      cv="@@#{library}"
-      if !class_variable_defined? cv
-        begin
+      cv = "@@#{library}"
+      unless class_variable_defined? cv
+        begin
           require library.to_s
-          class_variable_set(cv,true)
+          class_variable_set(cv, true)
         rescue LoadError
-          class_variable_set(cv,false)
+          class_variable_set(cv, false)
         end
       end
       class_variable_get(cv)
     end
   end
   create_has_library :gsl
-  SPLIT_TOKEN = ","
+  SPLIT_TOKEN = ','
   autoload(:Analysis, 'statsample/analysis')
   autoload(:Database, 'statsample/converters')
   autoload(:Anova, 'statsample/anova')
@@ -154,133 +165,122 @@ module Statsample
   autoload(:Multivariate, 'statsample/multivariate')
   autoload(:Multiset, 'statsample/multiset')
   autoload(:StratifiedSample, 'statsample/multiset')
-  autoload(:MLE, 'statsample/mle')
+  autoload(:MLE, 'statsample/mle')
   autoload(:Regression, 'statsample/regression')
   autoload(:Test, 'statsample/test')
   autoload(:Factor, 'statsample/factor')
   autoload(:Graph, 'statsample/graph')
   class << self
     # Load a object saved on a file.
     def load(filename)
       if File.exist? filename
-        o=false
-        File.open(filename,"r") {|fp| o=Marshal.load(fp) }
+        o = false
+        File.open(filename, 'r') { |fp| o = Marshal.load(fp) }
         o
       else
         false
       end
     end
     # Create a matrix using vectors as columns.
     # Use:
     #
     #   matrix=Statsample.vector_cols_matrix(v1,v2)
     def vector_cols_matrix(*vs)
       # test
-      size=vs[0].size
-      vs.each{|v|
-        raise ArgumentError,"Arguments should be Vector" unless v.instance_of? Statsample::Vector
-        raise ArgumentError,"Vectors size should be the same" if v.size!=size
-      }
-      Matrix.rows((0...size).to_a.collect() {|i|
-        vs.collect{|v| v[i]}
-      })
+      size = vs[0].size
+      vs.each do |v|
+        fail ArgumentError, 'Arguments should be Vector' unless v.instance_of? Statsample::Vector
+        fail ArgumentError, 'Vectors size should be the same' if v.size != size
+      end
+      Matrix.rows((0...size).to_a.collect { |i| vs.collect { |v| v[i] } })
     end
     # Returns a duplicate of the input vectors, without missing data
     # for any of the vectors.
-    #
+    #
     #  a=[1,2,3,6,7,nil,3,5].to_scale
     #  b=[nil,nil,5,6,4,5,10,2].to_scale
     #  c=[2,4,6,7,4,5,6,7].to_scale
     #  a2,b2,c2=Statsample.only_valid(a,b,c)
-    #  => [#<Statsample::Scale:0xb748c8c8 @data=[3, 6, 7, 3, 5]>,
-    #        #<Statsample::Scale:0xb748c814 @data=[5, 6, 4, 10, 2]>,
+    #  => [#<Statsample::Scale:0xb748c8c8 @data=[3, 6, 7, 3, 5]>,
+    #        #<Statsample::Scale:0xb748c814 @data=[5, 6, 4, 10, 2]>,
     #        #<Statsample::Scale:0xb748c760 @data=[6, 7, 4, 6, 7]>]
     #
     def only_valid(*vs)
-      i=1
-      h=vs.inject({}) {|a,v| a["v#{i}"]=v;i+=1;a}
-      ds=Statsample::Dataset.new(h).dup_only_valid
+      i = 1
+      h = vs.inject({}) { |acc, v| acc["v#{i}"] = v; i += 1; acc }
+      ds = Statsample::Dataset.new(h).dup_only_valid
       ds.vectors.values
     end
-    # Cheap version of #only_valid.
+    # Cheap version of #only_valid.
     # If any vectors have missing_values, return only valid.
     # If not, return the vectors itself
     def only_valid_clone(*vs)
-      if vs.any? {|v| v.flawed?}
+      if vs.any?(&:flawed?)
         only_valid(*vs)
       else
         vs
       end
     end
-  end
+  end
   module Util
     # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
-    def normal_order_statistic_medians(i,n)
-      if i==1
-        u= 1.0 - normal_order_statistic_medians(n,n)
-      elsif i==n
-        u=0.5**(1 / n.to_f)
+    def normal_order_statistic_medians(i, n)
+      if i == 1
+        u = 1.0 - normal_order_statistic_medians(n, n)
+      elsif i == n
+        u = 0.5**(1 / n.to_f)
       else
-        u= (i - 0.3175) / (n + 0.365)
+        u = (i - 0.3175) / (n + 0.365)
       end
       u
     end
-    def self.nice(s,e) # :nodoc:
-      reverse = e<s
+    def self.nice(s, e) # :nodoc:
+      reverse = e < s
       min = reverse ? e : s
       max = reverse ? s : e
-      span=max-min
-      return [s, e] if (!span or (span.respond_to? :infinite? and span.infinite?))
-      step=10**((Math::log(span).quo(Math::log(10))).round - 1).to_f
-      out=[(min.quo(step)).floor * step, (max.quo(step)).ceil * step]
+      span = max - min
+      return [s, e] if span == 0 || (span.respond_to?(:infinite?) && span.infinite?)
+      step = 10**((Math.log(span).quo(Math.log(10))).round - 1).to_f
+      out = [(min.quo(step)).floor * step, (max.quo(step)).ceil * step]
       out.reverse! if reverse
       out
     end
   end
   module Writable
     def save(filename)
-      fp=File.open(filename,"w")
-      Marshal.dump(self,fp)
+      fp = File.open(filename, 'w')
+      Marshal.dump(self, fp)
       fp.close
-    end
+    end
   end
   # Provides method summary to generate summaries and include GetText
   module Summarizable
     include GetText
-    bindtextdomain("statsample")
-    def summary(method=:to_text)
-      ReportBuilder.new(:no_title=>true).add(self).send(method)
+    bindtextdomain('statsample')
+    def summary(method = :to_text)
+      ReportBuilder.new(no_title: true).add(self).send(method)
     end
   end
   module STATSAMPLE__ #:nodoc:
   end
 end
 #--
-begin
+begin
   require 'statsamplert'
 rescue LoadError
   module Statsample
-    OPTIMIZED=false
+    OPTIMIZED = false
   end
 end

data/references.txt CHANGED Viewed

@@ -8,11 +8,13 @@ References
 * Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
 * Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
 * Härdle, W. & Simar, L. (2003). Applied Multivariate Statistical Analysis. Springer
+* Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.
 * Lin, J. (2007). VARIMAX_K58 [Source code]. [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
 * Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.
 * McGraw, K. & Wong, S.P. (1996). Forming Inferences About Some Intraclass Correlation Coefficients. Psychological methods, 1(1), 30-46.
 * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
 * SPSS Manual
+* Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
 * Shrout,P. & Fleiss, J. (1979). Intraclass Correlation: Uses in assessing rater reliability. Psychological Bulletin, 86(2), 420-428
 * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
 * http://en.wikipedia.org/wiki/Welch-Satterthwaite_equation
@@ -20,4 +22,3 @@ References
 * http://stattrek.com/Lesson6/SRS.aspx
 * http://talkstats.com/showthread.php?t=5056
 * http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
-* http://www.taygeta.com/random/gaussian.html

data/statsample.gemspec ADDED Viewed

@@ -0,0 +1,89 @@
+$:.unshift File.expand_path("../lib/", __FILE__)
+require 'statsample/version'
+require 'date'
+DESCRIPTION = <<MSG
+A suite for basic and advanced statistics on Ruby. Tested on CRuby 1.9.3, 2.0.0
+and 2.1.1. See `.travis.yml` for more information.
+Include:
+- Descriptive statistics: frequencies, median, mean,
+standard error, skew, kurtosis (and many others).
+- Imports and exports datasets from and to Excel, CSV and plain text files.
+- Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial,
+tau a, tau b and  gamma. Tetrachoric and Polychoric correlation provides by
+statsample-bivariate-extension gem.
+- Intra-class correlation
+- Anova: generic and vector-based One-way ANOVA and Two-way ANOVA, with contrasts for
+One-way ANOVA.
+- Tests: F, T, Levene, U-Mannwhitney.
+- Regression: Simple, Multiple (OLS), Probit and Logit
+- Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax,
+Equimax, Quartimax) and Parallel Analysis and Velicer's MAP test, for
+estimation of number of factors.
+- Reliability analysis for simple scale and a DSL to easily analyze multiple
+scales using factor analysis and correlations, if you want it.
+- Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
+- Sample calculation related formulas
+- Structural Equation Modeling (SEM), using R libraries +sem+ and +OpenMx+
+- Creates reports on text, html and rtf, using ReportBuilder gem
+- Graphics: Histogram, Boxplot and Scatterplot.
+MSG
+POSTINSTALL = <<MSG
+***************************************************
+Thanks for installing statsample.
+On *nix, you could install statsample-optimization
+to retrieve gems gsl, statistics2 and a C extension
+to speed some methods.
+$ [sudo] gem install statsample-optimization
+*****************************************************
+MSG
+Gem::Specification.new do |s|
+  s.name = "statsample"
+  s.version = Statsample::VERSION
+  s.date = Date.today.to_s
+  s.homepage = "https://github.com/sciruby/statsample"
+  s.authors = ["Claudio Bustos", "Carlos Agarie"]
+  s.email = ["clbustos@gmail.com", "carlos@onox.com.br"]
+  s.summary = "A suite for basic and advanced statistics on Ruby"
+  s.description = DESCRIPTION
+  s.post_install_message = POSTINSTALL
+  s.rdoc_options = ["--main", "README.md"]
+  s.extra_rdoc_files = ["History.txt", "LICENSE.txt", "README.md", "references.txt"]
+  s.require_paths = ["lib"]
+  s.files = `git ls-files`.split("\n")
+  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
+  s.add_runtime_dependency 'spreadsheet', '~> 0.6.5'
+  s.add_runtime_dependency 'reportbuilder', '~> 1.4'
+  s.add_runtime_dependency 'minimization'
+  s.add_runtime_dependency 'dirty-memoize'
+  s.add_runtime_dependency 'extendmatrix'
+  s.add_runtime_dependency 'rserve-client'
+  s.add_runtime_dependency 'rubyvis', '~> 0.5.0'
+  s.add_runtime_dependency 'distribution'
+  s.add_runtime_dependency 'rb-gsl'
+  s.add_runtime_dependency 'awesome_print'
+  s.add_development_dependency 'bundler'
+  s.add_development_dependency 'rake'
+  s.add_development_dependency 'rdoc'
+  s.add_development_dependency 'shoulda'
+  s.add_development_dependency 'shoulda-matchers', '~> 2.2'
+  s.add_development_dependency 'minitest'
+  s.add_development_dependency 'gettext'
+  s.add_development_dependency 'mocha'
+end

data/test/test_awesome_print_bug.rb ADDED Viewed

@@ -0,0 +1,16 @@
+require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
+class StatsampleAwesomePrintBug < MiniTest::Test
+  context("Awesome Print integration") do
+    setup do
+      require "awesome_print"
+    end
+    should "should be flawless" do
+      a=[1,2,3].to_scale
+      assert(a!=[1,2,3])
+      assert_nothing_raised do
+        ap a
+      end
+    end
+  end
+end

data/test/test_crosstab.rb CHANGED Viewed

@@ -52,4 +52,12 @@ class StatsampleCrosstabTestCase < MiniTest::Unit::TestCase
     ct=Statsample::Crosstab.new(v1,v2)
     assert_equal(Matrix[[2.5,2.5],[2.5,2.5]],ct.matrix_expected)
   end
+  def test_crosstab_with_scale
+    v1=%w{1 1 1 1 1 0 0 0 0 0}.to_scale
+    v2=%w{0 0 0 0 0 1 1 1 1 1}.to_scale
+    ct=Statsample::Crosstab.new(v1,v2)
+    assert_equal(Matrix[[0,5],[5,0]],ct.to_matrix)
+    assert_nothing_raised { ct.summary }
+  end
 end

data/test/test_histogram.rb CHANGED Viewed

@@ -100,6 +100,13 @@ class StatsampleHistogramTestCase < MiniTest::Unit::TestCase
       assert_equal(2,h.sum(1,4))
     end
+    should "not raise exception when all values equal" do
+      assert_nothing_raised do
+        a = [5,5,5,5,5,5].to_scale
+        h=Statsample::Graph::Histogram.new(a)
+        h.to_svg
+      end
+    end
   end
 end