bio-statsample-glm 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
@@ -22,14 +22,17 @@ module Statsample
22
22
  #
23
23
  # == Returns
24
24
  # GLM object for given method.
25
- def self.glm(x, y, method=:poisson)
25
+ def self.glm(x, y, method=:gaussian)
26
+
26
27
  if method.downcase.to_sym == :poisson
27
28
  obj = Statsample::Regression::GLM::Poisson.new(x,y)
28
29
  elsif method.downcase.to_sym == :binomial
29
30
  obj = Statsample::Regression::GLM::Logistic.new(x,y)
31
+ else
32
+ raise("Not implemented yet")
30
33
  end
34
+ obj.irwls
31
35
  obj
32
- #now, #irwls method is available to be called on returned obj
33
36
  end
34
37
 
35
38
 
@@ -4,9 +4,6 @@ module Statsample
4
4
 
5
5
  class Logistic
6
6
 
7
- # a named vector of coefficients
8
- attr_reader :coefficients
9
- #
10
7
  attr_reader :se
11
8
  # The fitted mean values
12
9
  attr_reader :fit
@@ -19,11 +16,28 @@ module Statsample
19
16
  # Boolean. Tells whether the IRWLS for the given model converged or not
20
17
  attr_reader :converged
21
18
 
22
- def initialize(x, y)
23
- @x = x
19
+ def initialize(ds, y)
20
+ @ds=ds
21
+ @fields=@ds.fields
22
+ @x = ds.to_matrix
24
23
  @y = y
25
24
  end
26
25
 
26
+ # named vector/hash of coefficients
27
+ # === Parameter
28
+ # * *type*: symbol; (:array, default). Options = [:array, :hash]
29
+ def coefficients(type=:array)
30
+ if type==:array
31
+ #originally returned as vector; so pass it
32
+ @coefficients
33
+ elsif type==:hash
34
+ h={}
35
+ @fields.size.times {|i|
36
+ h[@fields[i]]=@coefficients[i]
37
+ }
38
+ h
39
+ end
40
+ end
27
41
  def self.mu(x, b)
28
42
  matrix_mul = x * b
29
43
  numerator = matrix_mul.map { |y| Math.exp(y) }
@@ -91,4 +105,4 @@ module Statsample
91
105
 
92
106
  end
93
107
  end
94
- end
108
+ end
@@ -4,9 +4,6 @@ module Statsample
4
4
 
5
5
  class Poisson
6
6
 
7
- # a named vector of coefficients
8
- attr_reader :coefficients
9
- #
10
7
  attr_reader :se
11
8
  # The fitted mean values
12
9
  attr_reader :fit
@@ -19,11 +16,28 @@ module Statsample
19
16
  # Boolean. Tells whether the IRWLS for the given model converged or not
20
17
  attr_reader :converged
21
18
 
22
- def initialize(x, y)
23
- @x = x
19
+ def initialize(ds, y)
20
+ @ds=ds
21
+ @fields=@ds.fields
22
+ @x = ds.to_matrix
24
23
  @y = y
25
24
  end
26
25
 
26
+ # named vector/hash of coefficients
27
+ # === Parameter
28
+ # * *type*: symbol; (:array, default). Options = [:array, :hash]
29
+ def coefficients(type=:array)
30
+ if type==:array
31
+ @coefficients
32
+ elsif type==:hash
33
+ h={}
34
+ @fields.size.times {|i|
35
+ h[@fields[i]]=@coefficients[i]
36
+ }
37
+ h
38
+ end
39
+ end
40
+
27
41
  def self.mu(x, b, link=:log)
28
42
  if link.downcase.to_sym == :log
29
43
  (x * b).map { |y| Math.exp(y) }
@@ -42,6 +42,14 @@ module MiniTest
42
42
  assert_in_delta(v,obs[i],delta)
43
43
  }
44
44
  end
45
+ def assert_similar_hash(exp, obs, delta=1e-10,msg=nil)
46
+ msg||="Different hash #{exp} - #{obs}"
47
+ assert_equal(exp.size, obs.size)
48
+ exp.each_key {|k|
49
+ assert_in_delta(exp[k],obs[k],delta)
50
+ }
51
+ end
52
+
45
53
  def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
46
54
  assert_equal(exp.size, obs.size, "Different size.#{msg}")
47
55
  exp.size.times {|i|
@@ -1,37 +1,4 @@
1
1
  require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
2
 
3
3
  class StatsampleRegressionGlm < MiniTest::Unit::TestCase
4
-
5
- context("Example") do
6
- setup do
7
- x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
- x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
- @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
- @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
- intercept=Statsample::Vector.new([1]*50,:scale)
12
- @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
- end
14
-
15
- context("Logistic") do
16
- setup do
17
- @glm=Statsample::Regression.glm(@df.to_matrix,@y_log,:binomial)
18
- @glm.irwls
19
- end
20
-
21
- should "report correct coefficientes" do
22
- assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
23
- end
24
- end
25
-
26
- context("Poisson") do
27
- setup do
28
- @glm=Statsample::Regression.glm(@df.to_matrix,@y_pois,:poisson)
29
- @glm.irwls
30
- end
31
-
32
- should "report correct coefficientes" do
33
- assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
34
- end
35
- end
36
- end
37
4
  end
@@ -0,0 +1,23 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
+
3
+ class StatsampleRegressionGlmLogistic < MiniTest::Unit::TestCase
4
+
5
+ context("Example") do
6
+ setup do
7
+ x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
+ x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
+ @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
+ @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
+ intercept=Statsample::Vector.new([1]*50,:scale)
12
+ @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
+ @glm=Statsample::Regression.glm(@df,@y_log,:binomial)
14
+ end
15
+ should "report correct coefficientes as array" do
16
+ assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
17
+ end
18
+ should "report correct coefficientes as hash" do
19
+ assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.675603176233325,"x1"=>-0.312493754568903,"x2"=>2.28671333346264})
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,25 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
+
3
+ class StatsampleRegressionGlmPoisson < MiniTest::Unit::TestCase
4
+
5
+ context("Example") do
6
+ setup do
7
+ x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
+ x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
+ @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
+ @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
+ intercept=Statsample::Vector.new([1]*50,:scale)
12
+ @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
+ @glm=Statsample::Regression.glm(@df,@y_pois,:poisson)
14
+
15
+ end
16
+ should "report correct coefficientes as array" do
17
+ assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
18
+ end # should
19
+ should "report correct coefficientes as hash" do
20
+ assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.32993246633711,"x1"=>-0.586359358356708, "x2"=>1.28511323439258})
21
+ end # should
22
+
23
+ end # context
24
+ end # class
25
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-statsample-glm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -215,6 +215,8 @@ files:
215
215
  - lib/bio-statsample-glm/regression/poisson.rb
216
216
  - test/helper.rb
217
217
  - test/test_glm.rb
218
+ - test/test_glm_logistic.rb
219
+ - test/test_glm_poisson.rb
218
220
  homepage: http://github.com/AnkurGel/bioruby-statsample-glm
219
221
  licenses:
220
222
  - MIT
@@ -230,7 +232,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
230
232
  version: '0'
231
233
  segments:
232
234
  - 0
233
- hash: 797002845
235
+ hash: -988046223
234
236
  required_rubygems_version: !ruby/object:Gem::Requirement
235
237
  none: false
236
238
  requirements: