RubyGems - statsample-glm - Versions diffs - 0.0.1 → 0.1.0 - Mend

statsample-glm 0.0.1 → 0.1.0

Files changed (43) hide show

checksums.yaml +7 -0
data/.gitignore +51 -0
data/.rspec +1 -0
data/.travis.yml +2 -9
data/Gemfile +2 -20
data/LICENSE.txt +1 -1
data/README.rdoc +14 -11
data/Rakefile +16 -24
data/lib/statsample-glm.rb +1 -11
data/lib/statsample-glm/{regression.rb → glm.rb} +13 -46
data/lib/statsample-glm/glm/base.rb +99 -0
data/lib/statsample-glm/glm/irls/base.rb +54 -0
data/lib/statsample-glm/glm/irls/logistic.rb +46 -0
data/lib/statsample-glm/glm/irls/poisson.rb +48 -0
data/lib/statsample-glm/glm/logistic.rb +16 -0
data/lib/statsample-glm/glm/mle/base.rb +157 -0
data/lib/statsample-glm/glm/mle/logistic.rb +113 -0
data/lib/statsample-glm/glm/mle/normal.rb +94 -0
data/lib/statsample-glm/glm/mle/probit.rb +100 -0
data/lib/statsample-glm/glm/normal.rb +17 -0
data/lib/statsample-glm/glm/poisson.rb +17 -0
data/lib/statsample-glm/glm/probit.rb +16 -0
data/lib/statsample-glm/version.rb +5 -0
data/spec/data/logistic.csv +51 -0
data/spec/data/logistic_mle.csv +201 -0
data/spec/data/normal.csv +30 -0
data/spec/logistic_spec.rb +37 -0
data/spec/normal_spec.rb +15 -0
data/spec/poisson_spec.rb +32 -0
data/spec/probit_spec.rb +19 -0
data/spec/spec_helper.rb +50 -0
data/statsample-glm.gemspec +35 -0
metadata +71 -145
data/VERSION +0 -1
data/features/bio-statsample-glm.feature +0 -9
data/features/step_definitions/bio-statsample-glm_steps.rb +0 -0
data/features/support/env.rb +0 -15
data/lib/statsample-glm/regression/logistic.rb +0 -108
data/lib/statsample-glm/regression/poisson.rb +0 -90
data/test/helper.rb +0 -87
data/test/test_glm.rb +0 -4
data/test/test_glm_logistic.rb +0 -23
data/test/test_glm_poisson.rb +0 -25

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: a514ad693d8ef698d63b3c51c903ead6837bf498
+  data.tar.gz: 9a06cc01573d053da123a6f91f50dce703141244
+SHA512:
+  metadata.gz: 4330fe635519e054cd8f01e0dddde160cc1f61c21efbc2cedbae8f509c855e4b145982b99b8867fda6d6ecf03f9eea395de062ed0bf675a8309e4c921c26daf8
+  data.tar.gz: 979f0af03a502f92b1b09d1e4561ff3d8f77e45a832f1756136591db5ac1adcf807cd513a634f838339ff252214dc8356bf3f4a34088cc0c33fdf5dade192c35

data/.gitignore ADDED

@@ -0,0 +1,51 @@
+# rcov generated
+coverage
+coverage.data
+# rdoc generated
+rdoc
+# yard generated
+doc
+.yardoc
+# bundler
+.bundle
+# jeweler generated
+pkg
+# Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
+#
+# * Create a file at ~/.gitignore
+# * Include files you want ignored
+# * Run: git config --global core.excludesfile ~/.gitignore
+#
+# After doing this, these files will be ignored in all your git projects,
+# saving you from having to 'pollute' every project you touch with them
+#
+# Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
+#
+# For MacOS:
+#
+#.DS_Store
+# For TextMate
+#*.tmproj
+#tmtags
+# For emacs:
+#*~
+#\#*
+#.\#*
+# For vim:
+#*.swp
+# For redcar:
+#.redcar
+# For rubinius:
+#*.rbc
+# Ignore Gemfile.lock for gems. See http://yehudakatz.com/2010/12/16/clarifying-the-roles-of-the-gemspec-and-gemfile/
+Gemfile.lock

data/.rspec ADDED

	@@ -0,0 +1 @@
1	+ --color

data/.travis.yml CHANGED

@@ -1,13 +1,6 @@
 language: ruby
+cache: bundler
 rvm:
-  - 1.9.2
   - 1.9.3
   - 2.0.0
-  - jruby-19mode # JRuby in 1.9 mode
-  - rbx-19mode
-#  - 1.8.7
-#  - jruby-18mode # JRuby in 1.8 mode
-#  - rbx-18mode
-# uncomment this line if your project needs to run something other than `rake`:
-# script: bundle exec rspec spec
+  - 2.1.1

data/Gemfile CHANGED

@@ -1,20 +1,2 @@
-source "http://rubygems.org"
-gem 'statsample', '>=1.2.0'
-# Add dependencies required to use your gem here.
-# Example:
-   gem "activesupport", "= 3.2.10"
-# Add dependencies to develop your gem here.
-# Include everything needed to run rake, tests, features, etc.
-group :development do
-  gem "shoulda", ">= 0"
-  gem "rdoc", "~> 3.12"
-  gem "minitest", "~> 4.7.5"
-  gem "cucumber", ">= 0"
-  gem "bundler", "~> 1.3.5"
-  gem "jeweler", "~> 1.8.4"
-  gem "bio", ">= 1.4.2"
-  gem "rdoc", "~> 3.12"
-  gem "mocha", "~> 0.14.0"
-end
+source "https://rubygems.org"
+gemspec

data/LICENSE.txt CHANGED

@@ -9,7 +9,7 @@ You *must* read the Contributor Agreement before contributing code to the SciRub
 -----
-Copyright (c) 2010 - 2013, Ruby Science Foundation
+Copyright (c) 2013, Ankur Goel and the Ruby Science Foundation
 All rights reserved.
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

data/README.rdoc CHANGED

@@ -6,6 +6,8 @@ src="https://secure.travis-ci.org/AnkurGel/statsample-glm.png"
 Statsample-GLM is an extension of *Generalized Linear Models* to {Statsample}[https://github.com/SciRuby/statsample], a suite of advance statistics in Ruby.
+Requires ruby 1.9.3 or higher.
 * {sciruby.com}[http://sciruby.com]
 * {Google+}[https://plus.google.com/109304769076178160953/posts]
 * {Ankur Goel}[http://ankurgoel.com]
@@ -13,14 +15,17 @@ Statsample-GLM is an extension of *Generalized Linear Models* to {Statsample}[ht
 == Description
-Statsample-GLM is an extension of Statsample, and includes many helpful regression techniques for Generalized Linear models such as:
+Statsample-glm includes the following Generalized Linear Models:
-* Poisson Regression
-* Logistic Regression
-* Exponential Regression
 * Iteratively Reweighted Least Squares
+  * Poisson Regression
+  * Logistic Regression
+* Maximum Likelihood Models (Newton Raphson)
+  * Logistic Regression
+  * Probit Regression
+  * Normal Regression
-Statsample-GLM was created by Ankur Goel as part of Google's Summer of Code 2013. It is the part of {SciRuby}[http://sciruby.com]
+Statsample-GLM was created by Ankur Goel as part of Google's Summer of Code 2013. It is the part of {the SciRuby Project}[http://sciruby.com].
 Note: This is under active development!
@@ -40,7 +45,7 @@ You can also go through the blog-posts on {my blog}[http://ankurgoel.com] for de
 == Documentation
-The API doc is {online}[http://rubygems.org/gems/statsample-glm]. For more code examples see also the test files in the source tree.
+The API doc is {online}[http://rubygems.org/gems/statsample-glm]. For more code examples see also the spec files in the source tree.
 == Contributing
@@ -49,7 +54,7 @@ The API doc is {online}[http://rubygems.org/gems/statsample-glm]. For more code
 * Create your feature branch
 * Add/Modify code.
 * Write equivalent documentation and **tests**.
-* Run `rake test` to verify that all test case passes.
+* Run `rspec` to verify that all test case passes.
 * Push your branch.
 * Pull request. :)
@@ -60,12 +65,10 @@ Information on the source tree, documentation, issues and how to contribute, see
   http://github.com/ankurgel/statsample-glm
-== Biogems.info
 This Biogem is published at http://biogems.info/index.html#statsample-glm
 == Copyright
-Copyright (c) 2013 Ankur Goel. See LICENSE.txt for further details.
+Copyright (c) 2013 Ankur Goel and the Ruby Science Foundation. See LICENSE.txt for further details.
+Statsample is (c) 2009-2013 Claudio Bustos and the Ruby Science Foundation.

data/Rakefile CHANGED

@@ -1,6 +1,7 @@
 # encoding: utf-8
+require 'rake'
+require 'bundler/gem_tasks'
-require 'rubygems'
 require 'bundler'
 begin
   Bundler.setup(:default, :development)
@@ -9,37 +10,28 @@ rescue Bundler::BundlerError => e
   $stderr.puts "Run `bundle install` to install missing gems"
   exit e.status_code
 end
-require 'rake'
-require 'jeweler'
-Jeweler::Tasks.new do |gem|
-  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
-  gem.name = "statsample-glm"
-  gem.homepage = "http://github.com/AnkurGel/statsample-glm"
-  gem.license = "MIT"
-  gem.summary = %Q{Generalized Linear Models for Statsample}
-  gem.description = %Q{Statsample-GLM is an extension to Statsample, an advance statistics suite in Ruby. This gem includes modules for Regression techniques such as Poisson Regression, Logistic Regression and Exponential Regression}
-  gem.email = "ankurgel@gmail.com"
-  gem.authors = ["Ankur Goel"]
-  # dependencies defined in Gemfile
+desc "Open IRB with statsample-timeseries loaded."
+task :console do
+  require 'irb'
+  require 'irb/completion'
+  $:.unshift File.expand_path("../lib", __FILE__)
+  require 'statsample-glm'
+  ARGV.clear
+  IRB.start
 end
-Jeweler::RubygemsDotOrgTasks.new
-require 'rake/testtask'
-Rake::TestTask.new(:test) do |test|
-  test.libs << 'lib' << 'test'
-  test.pattern = 'test/**/test_*.rb'
-  test.verbose = true
-end
-require 'cucumber/rake/task'
-Cucumber::Rake::Task.new(:features)
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec)
-task :default => :test
+task :default => :spec
 require 'rdoc/task'
 Rake::RDocTask.new do |rdoc|
-  version = File.exist?('VERSION') ? File.read('VERSION') : ""
+  $:.unshift File.expand_path("../lib", __FILE__)
+  version = Statsample::GLM::VERSION
   rdoc.rdoc_dir = 'rdoc'
   rdoc.title = "statsample-glm #{version}"

data/lib/statsample-glm.rb CHANGED

@@ -1,12 +1,2 @@
-# Please require your code below, respecting the naming conventions in the
-# bioruby directory tree.
-#
-# For example, say you have a plugin named bio-plugin, the only uncommented
-# line in this file would be
-#
-#   require 'bio/bio-plugin/plugin'
-#
-# In this file only require other files. Avoid other source code.
 require 'statsample'
-require 'statsample-glm/regression'
+require 'statsample-glm/glm'

data/lib/statsample-glm/{regression.rb → glm.rb} RENAMED

@@ -1,7 +1,10 @@
-require 'statsample-glm/regression/poisson'
-require 'statsample-glm/regression/logistic'
+require 'statsample-glm/glm/logistic'
+require 'statsample-glm/glm/probit'
+require 'statsample-glm/glm/poisson'
+require 'statsample-glm/glm/normal'
 module Statsample
-  module Regression
+  module GLM
     include Statsample::VectorShorthands
     # = Generalized linear models
@@ -15,55 +18,19 @@ module Statsample
     #    require 'statsample-glm'
     #    x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
     #    x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
-    #    y=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
-    #    x=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
-    #    obj = Statsample::Regression.glm(x, y, :binomial)
+    #    x=Statsample::Dataset.new({"x1"=>x1,"x2"=>x2})
+    #    obj = Statsample::GLM.compute(x, y, :logit, {algorithm: :irls})
     #    #=> Logistic Regression object
     #
     # == Returns
     #    GLM object for given method.
-    def self.glm(x, y, method=:gaussian)
-      if method.downcase.to_sym == :poisson
-        obj = Statsample::Regression::GLM::Poisson.new(x,y)
-      elsif method.downcase.to_sym == :binomial
-        obj = Statsample::Regression::GLM::Logistic.new(x,y)
-      else
-        raise("Not implemented yet")
-      end
-      obj.irwls
-      obj
-    end
-    def self.irwls(x, y, mu, w, j, h, epsilon = 1e-7, max_iter = 100)
-      b = Matrix.column_vector(Array.new(x.column_size,0.0))
-      converged = false
-      1.upto(max_iter) do |i|
-        #conversion from : (solve(j(x,b)) %*% h(x,b,y))
+    def self.compute(data_set, dependent_column, method, opts={})
+      opts[:method] = method
-        intermediate = (j.call(x,b).inverse * h.call(x,b,y))
-        b_new = b - intermediate
-        if((b_new - b).map(&:abs)).to_a.flatten.inject(:+) < epsilon
-          converged = true
-          b = b_new
-          break
-        end
-        b = b_new
-      end
-      ss = j.call(x,b).inverse.diagonal.map{ |x| -x}.map{ |y| Math.sqrt(y) }
-      values = mu.call(x,b)
-      residuals = y - values.column_vectors.map(&:to_a).flatten
-      df_residuals = y.count - x.column_size
-      return [create_vector(b.column_vectors[0]), create_vector(ss), create_vector(values.to_a.flatten),
-              residuals, max_iter, df_residuals, converged]
-    end
+      # TODO: Remove this const_get jugaad after 1.9.3 support is removed.
-    private
-    def self.create_vector(arr)
-      Statsample::Vector.new(arr, :scale)
+      Kernel.const_get("Statsample").const_get("GLM").const_get("#{method.capitalize}").new data_set,
+        dependent_column, opts
     end
   end
 end

data/lib/statsample-glm/glm/base.rb ADDED

@@ -0,0 +1,99 @@
+require 'statsample-glm/glm/irls/logistic'
+require 'statsample-glm/glm/irls/poisson'
+require 'statsample-glm/glm/mle/logistic'
+require 'statsample-glm/glm/mle/probit'
+require 'statsample-glm/glm/mle/normal'
+module Statsample
+  module GLM
+    class Base
+      def initialize ds, y, opts={}
+        @opts   = opts
+        set_default_opts_if_any
+        @data_set  = ds.dup(ds.fields - [y.to_s])
+        @dependent = ds[y.to_s]
+        add_constant_vector if @opts[:constant]
+        add_constant_vector(1) if self.is_a? Statsample::GLM::Normal
+        algorithm = @opts[:algorithm].upcase
+        method    = @opts[:method].capitalize
+        # TODO: Remove this const_get jugaad after 1.9.3 support is removed.
+        @regression = Kernel.const_get("Statsample").const_get("GLM")
+                            .const_get("#{algorithm}").const_get("#{method}")
+                            .new(@data_set, @dependent, @opts)
+      end
+      def coefficients as_a=:array
+        if as_a == :hash
+          c = {}
+          @data_set.fields.each_with_index do |f,i|
+            c[f.to_sym] = @regression.coefficients[i]
+          end
+          return c
+        end
+        create_vector @regression.coefficients
+      end
+      def standard_error as_a=:array
+        if as_a == :hash
+          se = {}
+          @data_set.fields.each_with_index do |f,i|
+            se[f.to_sym] = @regression.standard_error[i]
+          end
+          return se
+        end
+        create_vector @regression.standard_error
+      end
+      def iterations
+        @regression.iterations
+      end
+      def fitted_mean_values
+        @regression.fitted_mean_values
+      end
+      def residuals
+        @regression.residuals
+      end
+      def degree_of_freedom
+        @regression.degree_of_freedom
+      end
+      def log_likelihood
+        @regression.log_likelihood if @opts[:algorithm] == :mle
+      end
+     private
+      def set_default_opts_if_any
+        @opts[:algorithm]  ||= :irls
+        @opts[:iterations] ||= 100
+        @opts[:epsilon]    ||= 1e-7
+        @opts[:link]       ||= :log
+      end
+      def create_vector arr
+        Statsample::Vector.new(arr, :scale)
+      end
+      def add_constant_vector x=nil
+        @data_set.add_vector "constant",
+          (([@opts[:constant]]*@data_set.cases).to_vector(:scale))
+        unless x.nil?
+          @data_set.add_vector "constant",
+            (([1]*@data_set.cases).to_vector(:scale))
+        end
+      end
+    end
+  end
+end

data/lib/statsample-glm/glm/irls/base.rb ADDED

@@ -0,0 +1,54 @@
+module Statsample
+  module GLM
+    module IRLS
+      class Base
+        attr_reader :coefficients, :standard_error, :iterations,
+          :fitted_mean_values, :residuals, :degree_of_freedom
+        def initialize data_set, dependent, opts={}
+          @data_set  = data_set.to_matrix
+          @dependent = dependent
+          @opts      = opts
+          irls
+        end
+       private
+        def irls
+          max_iter   = @opts[:iterations]
+          b          = Matrix.column_vector Array.new(@data_set.column_size,0.0)
+          1.upto(max_iter) do
+            intermediate = (hessian(@data_set,b).inverse *
+                            jacobian(@data_set, b, @dependent))
+            b_new        = b - intermediate
+            if((b_new - b).map(&:abs)).to_a.flatten.inject(:+) < @opts[:epsilon]
+              b = b_new
+              break
+            end
+            b = b_new
+          end
+          @coefficients       = create_vector(b.column_vectors[0])
+          @iterations         = max_iter
+          @standard_error     = hessian(@data_set,b).inverse
+                                                    .diagonal
+                                                    .map{ |x| -x}
+                                                    .map{ |y| Math.sqrt(y) }
+          @fitted_mean_values = create_vector measurement(@data_set,b).to_a.flatten
+          @residuals          = @dependent - @fitted_mean_values
+          @degree_of_freedom  = @dependent.count - @data_set.column_size
+        end
+        def create_vector arr
+          Statsample::Vector.new(arr, :scale)
+        end
+      end
+    end
+  end
+end