bio-statsample-glm 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/bio-statsample-glm/regression.rb +5 -2
- data/lib/bio-statsample-glm/regression/logistic.rb +20 -6
- data/lib/bio-statsample-glm/regression/poisson.rb +19 -5
- data/test/helper.rb +8 -0
- data/test/test_glm.rb +0 -33
- data/test/test_glm_logistic.rb +23 -0
- data/test/test_glm_poisson.rb +25 -0
- metadata +4 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.1
|
@@ -22,14 +22,17 @@ module Statsample
|
|
22
22
|
#
|
23
23
|
# == Returns
|
24
24
|
# GLM object for given method.
|
25
|
-
def self.glm(x, y, method=:
|
25
|
+
def self.glm(x, y, method=:gaussian)
|
26
|
+
|
26
27
|
if method.downcase.to_sym == :poisson
|
27
28
|
obj = Statsample::Regression::GLM::Poisson.new(x,y)
|
28
29
|
elsif method.downcase.to_sym == :binomial
|
29
30
|
obj = Statsample::Regression::GLM::Logistic.new(x,y)
|
31
|
+
else
|
32
|
+
raise("Not implemented yet")
|
30
33
|
end
|
34
|
+
obj.irwls
|
31
35
|
obj
|
32
|
-
#now, #irwls method is available to be called on returned obj
|
33
36
|
end
|
34
37
|
|
35
38
|
|
@@ -4,9 +4,6 @@ module Statsample
|
|
4
4
|
|
5
5
|
class Logistic
|
6
6
|
|
7
|
-
# a named vector of coefficients
|
8
|
-
attr_reader :coefficients
|
9
|
-
#
|
10
7
|
attr_reader :se
|
11
8
|
# The fitted mean values
|
12
9
|
attr_reader :fit
|
@@ -19,11 +16,28 @@ module Statsample
|
|
19
16
|
# Boolean. Tells whether the IRWLS for the given model converged or not
|
20
17
|
attr_reader :converged
|
21
18
|
|
22
|
-
def initialize(
|
23
|
-
@
|
19
|
+
def initialize(ds, y)
|
20
|
+
@ds=ds
|
21
|
+
@fields=@ds.fields
|
22
|
+
@x = ds.to_matrix
|
24
23
|
@y = y
|
25
24
|
end
|
26
25
|
|
26
|
+
# named vector/hash of coefficients
|
27
|
+
# === Parameter
|
28
|
+
# * *type*: symbol; (:array, default). Options = [:array, :hash]
|
29
|
+
def coefficients(type=:array)
|
30
|
+
if type==:array
|
31
|
+
#originally returned as vector; so pass it
|
32
|
+
@coefficients
|
33
|
+
elsif type==:hash
|
34
|
+
h={}
|
35
|
+
@fields.size.times {|i|
|
36
|
+
h[@fields[i]]=@coefficients[i]
|
37
|
+
}
|
38
|
+
h
|
39
|
+
end
|
40
|
+
end
|
27
41
|
def self.mu(x, b)
|
28
42
|
matrix_mul = x * b
|
29
43
|
numerator = matrix_mul.map { |y| Math.exp(y) }
|
@@ -91,4 +105,4 @@ module Statsample
|
|
91
105
|
|
92
106
|
end
|
93
107
|
end
|
94
|
-
end
|
108
|
+
end
|
@@ -4,9 +4,6 @@ module Statsample
|
|
4
4
|
|
5
5
|
class Poisson
|
6
6
|
|
7
|
-
# a named vector of coefficients
|
8
|
-
attr_reader :coefficients
|
9
|
-
#
|
10
7
|
attr_reader :se
|
11
8
|
# The fitted mean values
|
12
9
|
attr_reader :fit
|
@@ -19,11 +16,28 @@ module Statsample
|
|
19
16
|
# Boolean. Tells whether the IRWLS for the given model converged or not
|
20
17
|
attr_reader :converged
|
21
18
|
|
22
|
-
def initialize(
|
23
|
-
@
|
19
|
+
def initialize(ds, y)
|
20
|
+
@ds=ds
|
21
|
+
@fields=@ds.fields
|
22
|
+
@x = ds.to_matrix
|
24
23
|
@y = y
|
25
24
|
end
|
26
25
|
|
26
|
+
# named vector/hash of coefficients
|
27
|
+
# === Parameter
|
28
|
+
# * *type*: symbol; (:array, default). Options = [:array, :hash]
|
29
|
+
def coefficients(type=:array)
|
30
|
+
if type==:array
|
31
|
+
@coefficients
|
32
|
+
elsif type==:hash
|
33
|
+
h={}
|
34
|
+
@fields.size.times {|i|
|
35
|
+
h[@fields[i]]=@coefficients[i]
|
36
|
+
}
|
37
|
+
h
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
27
41
|
def self.mu(x, b, link=:log)
|
28
42
|
if link.downcase.to_sym == :log
|
29
43
|
(x * b).map { |y| Math.exp(y) }
|
data/test/helper.rb
CHANGED
@@ -42,6 +42,14 @@ module MiniTest
|
|
42
42
|
assert_in_delta(v,obs[i],delta)
|
43
43
|
}
|
44
44
|
end
|
45
|
+
def assert_similar_hash(exp, obs, delta=1e-10,msg=nil)
|
46
|
+
msg||="Different hash #{exp} - #{obs}"
|
47
|
+
assert_equal(exp.size, obs.size)
|
48
|
+
exp.each_key {|k|
|
49
|
+
assert_in_delta(exp[k],obs[k],delta)
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
45
53
|
def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
|
46
54
|
assert_equal(exp.size, obs.size, "Different size.#{msg}")
|
47
55
|
exp.size.times {|i|
|
data/test/test_glm.rb
CHANGED
@@ -1,37 +1,4 @@
|
|
1
1
|
require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
|
2
2
|
|
3
3
|
class StatsampleRegressionGlm < MiniTest::Unit::TestCase
|
4
|
-
|
5
|
-
context("Example") do
|
6
|
-
setup do
|
7
|
-
x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
|
8
|
-
x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
|
9
|
-
@y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
|
10
|
-
@y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
|
11
|
-
intercept=Statsample::Vector.new([1]*50,:scale)
|
12
|
-
@df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
|
13
|
-
end
|
14
|
-
|
15
|
-
context("Logistic") do
|
16
|
-
setup do
|
17
|
-
@glm=Statsample::Regression.glm(@df.to_matrix,@y_log,:binomial)
|
18
|
-
@glm.irwls
|
19
|
-
end
|
20
|
-
|
21
|
-
should "report correct coefficientes" do
|
22
|
-
assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
context("Poisson") do
|
27
|
-
setup do
|
28
|
-
@glm=Statsample::Regression.glm(@df.to_matrix,@y_pois,:poisson)
|
29
|
-
@glm.irwls
|
30
|
-
end
|
31
|
-
|
32
|
-
should "report correct coefficientes" do
|
33
|
-
assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
4
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
|
2
|
+
|
3
|
+
class StatsampleRegressionGlmLogistic < MiniTest::Unit::TestCase
|
4
|
+
|
5
|
+
context("Example") do
|
6
|
+
setup do
|
7
|
+
x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
|
8
|
+
x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
|
9
|
+
@y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
|
10
|
+
@y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
|
11
|
+
intercept=Statsample::Vector.new([1]*50,:scale)
|
12
|
+
@df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
|
13
|
+
@glm=Statsample::Regression.glm(@df,@y_log,:binomial)
|
14
|
+
end
|
15
|
+
should "report correct coefficientes as array" do
|
16
|
+
assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
|
17
|
+
end
|
18
|
+
should "report correct coefficientes as hash" do
|
19
|
+
assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.675603176233325,"x1"=>-0.312493754568903,"x2"=>2.28671333346264})
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
|
2
|
+
|
3
|
+
class StatsampleRegressionGlmPoisson < MiniTest::Unit::TestCase
|
4
|
+
|
5
|
+
context("Example") do
|
6
|
+
setup do
|
7
|
+
x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
|
8
|
+
x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
|
9
|
+
@y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
|
10
|
+
@y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
|
11
|
+
intercept=Statsample::Vector.new([1]*50,:scale)
|
12
|
+
@df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
|
13
|
+
@glm=Statsample::Regression.glm(@df,@y_pois,:poisson)
|
14
|
+
|
15
|
+
end
|
16
|
+
should "report correct coefficientes as array" do
|
17
|
+
assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
|
18
|
+
end # should
|
19
|
+
should "report correct coefficientes as hash" do
|
20
|
+
assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.32993246633711,"x1"=>-0.586359358356708, "x2"=>1.28511323439258})
|
21
|
+
end # should
|
22
|
+
|
23
|
+
end # context
|
24
|
+
end # class
|
25
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-statsample-glm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -215,6 +215,8 @@ files:
|
|
215
215
|
- lib/bio-statsample-glm/regression/poisson.rb
|
216
216
|
- test/helper.rb
|
217
217
|
- test/test_glm.rb
|
218
|
+
- test/test_glm_logistic.rb
|
219
|
+
- test/test_glm_poisson.rb
|
218
220
|
homepage: http://github.com/AnkurGel/bioruby-statsample-glm
|
219
221
|
licenses:
|
220
222
|
- MIT
|
@@ -230,7 +232,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
230
232
|
version: '0'
|
231
233
|
segments:
|
232
234
|
- 0
|
233
|
-
hash:
|
235
|
+
hash: -988046223
|
234
236
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
235
237
|
none: false
|
236
238
|
requirements:
|