bio-statsample-glm 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
@@ -22,14 +22,17 @@ module Statsample
22
22
  #
23
23
  # == Returns
24
24
  # GLM object for given method.
25
- def self.glm(x, y, method=:poisson)
25
+ def self.glm(x, y, method=:gaussian)
26
+
26
27
  if method.downcase.to_sym == :poisson
27
28
  obj = Statsample::Regression::GLM::Poisson.new(x,y)
28
29
  elsif method.downcase.to_sym == :binomial
29
30
  obj = Statsample::Regression::GLM::Logistic.new(x,y)
31
+ else
32
+ raise("Not implemented yet")
30
33
  end
34
+ obj.irwls
31
35
  obj
32
- #now, #irwls method is available to be called on returned obj
33
36
  end
34
37
 
35
38
 
@@ -4,9 +4,6 @@ module Statsample
4
4
 
5
5
  class Logistic
6
6
 
7
- # a named vector of coefficients
8
- attr_reader :coefficients
9
- #
10
7
  attr_reader :se
11
8
  # The fitted mean values
12
9
  attr_reader :fit
@@ -19,11 +16,28 @@ module Statsample
19
16
  # Boolean. Tells whether the IRWLS for the given model converged or not
20
17
  attr_reader :converged
21
18
 
22
- def initialize(x, y)
23
- @x = x
19
+ def initialize(ds, y)
20
+ @ds=ds
21
+ @fields=@ds.fields
22
+ @x = ds.to_matrix
24
23
  @y = y
25
24
  end
26
25
 
26
+ # named vector/hash of coefficients
27
+ # === Parameter
28
+ # * *type*: symbol; (:array, default). Options = [:array, :hash]
29
+ def coefficients(type=:array)
30
+ if type==:array
31
+ #originally returned as vector; so pass it
32
+ @coefficients
33
+ elsif type==:hash
34
+ h={}
35
+ @fields.size.times {|i|
36
+ h[@fields[i]]=@coefficients[i]
37
+ }
38
+ h
39
+ end
40
+ end
27
41
  def self.mu(x, b)
28
42
  matrix_mul = x * b
29
43
  numerator = matrix_mul.map { |y| Math.exp(y) }
@@ -91,4 +105,4 @@ module Statsample
91
105
 
92
106
  end
93
107
  end
94
- end
108
+ end
@@ -4,9 +4,6 @@ module Statsample
4
4
 
5
5
  class Poisson
6
6
 
7
- # a named vector of coefficients
8
- attr_reader :coefficients
9
- #
10
7
  attr_reader :se
11
8
  # The fitted mean values
12
9
  attr_reader :fit
@@ -19,11 +16,28 @@ module Statsample
19
16
  # Boolean. Tells whether the IRWLS for the given model converged or not
20
17
  attr_reader :converged
21
18
 
22
- def initialize(x, y)
23
- @x = x
19
+ def initialize(ds, y)
20
+ @ds=ds
21
+ @fields=@ds.fields
22
+ @x = ds.to_matrix
24
23
  @y = y
25
24
  end
26
25
 
26
+ # named vector/hash of coefficients
27
+ # === Parameter
28
+ # * *type*: symbol; (:array, default). Options = [:array, :hash]
29
+ def coefficients(type=:array)
30
+ if type==:array
31
+ @coefficients
32
+ elsif type==:hash
33
+ h={}
34
+ @fields.size.times {|i|
35
+ h[@fields[i]]=@coefficients[i]
36
+ }
37
+ h
38
+ end
39
+ end
40
+
27
41
  def self.mu(x, b, link=:log)
28
42
  if link.downcase.to_sym == :log
29
43
  (x * b).map { |y| Math.exp(y) }
@@ -42,6 +42,14 @@ module MiniTest
42
42
  assert_in_delta(v,obs[i],delta)
43
43
  }
44
44
  end
45
+ def assert_similar_hash(exp, obs, delta=1e-10,msg=nil)
46
+ msg||="Different hash #{exp} - #{obs}"
47
+ assert_equal(exp.size, obs.size)
48
+ exp.each_key {|k|
49
+ assert_in_delta(exp[k],obs[k],delta)
50
+ }
51
+ end
52
+
45
53
  def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
46
54
  assert_equal(exp.size, obs.size, "Different size.#{msg}")
47
55
  exp.size.times {|i|
@@ -1,37 +1,4 @@
1
1
  require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
2
 
3
3
  class StatsampleRegressionGlm < MiniTest::Unit::TestCase
4
-
5
- context("Example") do
6
- setup do
7
- x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
- x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
- @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
- @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
- intercept=Statsample::Vector.new([1]*50,:scale)
12
- @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
- end
14
-
15
- context("Logistic") do
16
- setup do
17
- @glm=Statsample::Regression.glm(@df.to_matrix,@y_log,:binomial)
18
- @glm.irwls
19
- end
20
-
21
- should "report correct coefficientes" do
22
- assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
23
- end
24
- end
25
-
26
- context("Poisson") do
27
- setup do
28
- @glm=Statsample::Regression.glm(@df.to_matrix,@y_pois,:poisson)
29
- @glm.irwls
30
- end
31
-
32
- should "report correct coefficientes" do
33
- assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
34
- end
35
- end
36
- end
37
4
  end
@@ -0,0 +1,23 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
+
3
+ class StatsampleRegressionGlmLogistic < MiniTest::Unit::TestCase
4
+
5
+ context("Example") do
6
+ setup do
7
+ x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
+ x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
+ @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
+ @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
+ intercept=Statsample::Vector.new([1]*50,:scale)
12
+ @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
+ @glm=Statsample::Regression.glm(@df,@y_log,:binomial)
14
+ end
15
+ should "report correct coefficientes as array" do
16
+ assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
17
+ end
18
+ should "report correct coefficientes as hash" do
19
+ assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.675603176233325,"x1"=>-0.312493754568903,"x2"=>2.28671333346264})
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,25 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
+
3
+ class StatsampleRegressionGlmPoisson < MiniTest::Unit::TestCase
4
+
5
+ context("Example") do
6
+ setup do
7
+ x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
+ x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
+ @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
+ @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
+ intercept=Statsample::Vector.new([1]*50,:scale)
12
+ @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
+ @glm=Statsample::Regression.glm(@df,@y_pois,:poisson)
14
+
15
+ end
16
+ should "report correct coefficientes as array" do
17
+ assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
18
+ end # should
19
+ should "report correct coefficientes as hash" do
20
+ assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.32993246633711,"x1"=>-0.586359358356708, "x2"=>1.28511323439258})
21
+ end # should
22
+
23
+ end # context
24
+ end # class
25
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-statsample-glm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -215,6 +215,8 @@ files:
215
215
  - lib/bio-statsample-glm/regression/poisson.rb
216
216
  - test/helper.rb
217
217
  - test/test_glm.rb
218
+ - test/test_glm_logistic.rb
219
+ - test/test_glm_poisson.rb
218
220
  homepage: http://github.com/AnkurGel/bioruby-statsample-glm
219
221
  licenses:
220
222
  - MIT
@@ -230,7 +232,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
230
232
  version: '0'
231
233
  segments:
232
234
  - 0
233
- hash: 797002845
235
+ hash: -988046223
234
236
  required_rubygems_version: !ruby/object:Gem::Requirement
235
237
  none: false
236
238
  requirements: