RubyGems - bio-statsample-glm - Versions diffs - 0.1.0 → 0.1.1 - Mend

bio-statsample-glm 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/VERSION +1 -1
data/lib/bio-statsample-glm/regression.rb +5 -2
data/lib/bio-statsample-glm/regression/logistic.rb +20 -6
data/lib/bio-statsample-glm/regression/poisson.rb +19 -5
data/test/helper.rb +8 -0
data/test/test_glm.rb +0 -33
data/test/test_glm_logistic.rb +23 -0
data/test/test_glm_poisson.rb +25 -0
metadata +4 -2

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.0
1	+ 0.1.1

data/lib/bio-statsample-glm/regression.rb CHANGED

@@ -22,14 +22,17 @@ module Statsample
     #
     # == Returns
     #    GLM object for given method.
-    def self.glm(x, y, method=:poisson)
+    def self.glm(x, y, method=:gaussian)
       if method.downcase.to_sym == :poisson
         obj = Statsample::Regression::GLM::Poisson.new(x,y)
       elsif method.downcase.to_sym == :binomial
         obj = Statsample::Regression::GLM::Logistic.new(x,y)
+      else
+        raise("Not implemented yet")
       end
+      obj.irwls
       obj
-      #now, #irwls method is available to be called on returned obj
     end

data/lib/bio-statsample-glm/regression/logistic.rb CHANGED

@@ -4,9 +4,6 @@ module Statsample
       class Logistic
-        # a named vector of coefficients
-        attr_reader :coefficients
-        #
         attr_reader :se
         # The fitted mean values
         attr_reader :fit
@@ -19,11 +16,28 @@ module Statsample
         # Boolean. Tells whether the IRWLS for the given model converged or not
         attr_reader :converged
-        def initialize(x, y)
-          @x = x
+        def initialize(ds, y)
+          @ds=ds
+          @fields=@ds.fields
+          @x = ds.to_matrix
           @y = y
         end
+        # named vector/hash of coefficients
+        # === Parameter
+        # * *type*: symbol; (:array, default). Options = [:array, :hash]
+        def coefficients(type=:array)
+          if type==:array
+            #originally returned as vector; so pass it
+            @coefficients
+          elsif type==:hash
+            h={}
+            @fields.size.times {|i|
+              h[@fields[i]]=@coefficients[i]
+            }
+            h
+          end
+        end
         def self.mu(x, b)
           matrix_mul = x * b
           numerator = matrix_mul.map { |y| Math.exp(y) }
@@ -91,4 +105,4 @@ module Statsample
     end
   end
-end
+end

data/lib/bio-statsample-glm/regression/poisson.rb CHANGED

@@ -4,9 +4,6 @@ module Statsample
       class Poisson
-        # a named vector of coefficients
-        attr_reader :coefficients
-        #
         attr_reader :se
         # The fitted mean values
         attr_reader :fit
@@ -19,11 +16,28 @@ module Statsample
         # Boolean. Tells whether the IRWLS for the given model converged or not
         attr_reader :converged
-        def initialize(x, y)
-          @x = x
+        def initialize(ds, y)
+          @ds=ds
+          @fields=@ds.fields
+          @x = ds.to_matrix
           @y = y
         end
+        # named vector/hash of coefficients
+        # === Parameter
+        # * *type*: symbol; (:array, default). Options = [:array, :hash]
+        def coefficients(type=:array)
+          if type==:array
+            @coefficients
+          elsif type==:hash
+            h={}
+            @fields.size.times {|i|
+              h[@fields[i]]=@coefficients[i]
+            }
+            h
+          end
+        end
         def self.mu(x, b, link=:log)
           if link.downcase.to_sym == :log
             (x * b).map { |y| Math.exp(y) }

data/test/helper.rb CHANGED

@@ -42,6 +42,14 @@ module MiniTest
         assert_in_delta(v,obs[i],delta)
       }
     end
+    def assert_similar_hash(exp, obs, delta=1e-10,msg=nil)
+      msg||="Different hash #{exp} - #{obs}"
+      assert_equal(exp.size, obs.size)
+      exp.each_key {|k|
+        assert_in_delta(exp[k],obs[k],delta)
+      }
+    end
     def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
       assert_equal(exp.size, obs.size, "Different size.#{msg}")
       exp.size.times {|i|

data/test/test_glm.rb CHANGED

@@ -1,37 +1,4 @@
 require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
 class StatsampleRegressionGlm < MiniTest::Unit::TestCase
-  context("Example") do
-    setup do
-      x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
-      x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
-      @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
-      @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
-      intercept=Statsample::Vector.new([1]*50,:scale)
-      @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
-    end
-    context("Logistic") do
-      setup do
-        @glm=Statsample::Regression.glm(@df.to_matrix,@y_log,:binomial)
-        @glm.irwls
-      end
-      should "report correct coefficientes" do
-        assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
-      end
-    end
-    context("Poisson") do
-      setup do
-        @glm=Statsample::Regression.glm(@df.to_matrix,@y_pois,:poisson)
-        @glm.irwls
-      end
-      should "report correct coefficientes" do
-        assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
-      end
-    end
-  end
 end

data/test/test_glm_logistic.rb ADDED

@@ -0,0 +1,23 @@
+require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
+class StatsampleRegressionGlmLogistic < MiniTest::Unit::TestCase
+  context("Example") do
+    setup do
+      x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
+      x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
+      @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
+      @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
+      intercept=Statsample::Vector.new([1]*50,:scale)
+      @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
+      @glm=Statsample::Regression.glm(@df,@y_log,:binomial)
+    end
+    should "report correct coefficientes as array" do
+      assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
+    end
+    should "report correct coefficientes as hash" do
+      assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.675603176233325,"x1"=>-0.312493754568903,"x2"=>2.28671333346264})
+    end
+  end
+end

data/test/test_glm_poisson.rb ADDED

@@ -0,0 +1,25 @@
+require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
+class StatsampleRegressionGlmPoisson < MiniTest::Unit::TestCase
+  context("Example") do
+    setup do
+      x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
+      x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
+      @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
+      @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
+      intercept=Statsample::Vector.new([1]*50,:scale)
+      @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
+      @glm=Statsample::Regression.glm(@df,@y_pois,:poisson)
+    end
+    should "report correct coefficientes as array" do
+      assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
+    end # should
+    should "report correct coefficientes as hash" do
+      assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.32993246633711,"x1"=>-0.586359358356708, "x2"=>1.28511323439258})
+    end # should
+  end # context
+end # class

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: bio-statsample-glm
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
   prerelease:
 platform: ruby
 authors:
@@ -215,6 +215,8 @@ files:
 - lib/bio-statsample-glm/regression/poisson.rb
 - test/helper.rb
 - test/test_glm.rb
+- test/test_glm_logistic.rb
+- test/test_glm_poisson.rb
 homepage: http://github.com/AnkurGel/bioruby-statsample-glm
 licenses:
 - MIT
@@ -230,7 +232,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 797002845
+      hash: -988046223
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements: