statsample-glm 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +51 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +2 -9
  5. data/Gemfile +2 -20
  6. data/LICENSE.txt +1 -1
  7. data/README.rdoc +14 -11
  8. data/Rakefile +16 -24
  9. data/lib/statsample-glm.rb +1 -11
  10. data/lib/statsample-glm/{regression.rb → glm.rb} +13 -46
  11. data/lib/statsample-glm/glm/base.rb +99 -0
  12. data/lib/statsample-glm/glm/irls/base.rb +54 -0
  13. data/lib/statsample-glm/glm/irls/logistic.rb +46 -0
  14. data/lib/statsample-glm/glm/irls/poisson.rb +48 -0
  15. data/lib/statsample-glm/glm/logistic.rb +16 -0
  16. data/lib/statsample-glm/glm/mle/base.rb +157 -0
  17. data/lib/statsample-glm/glm/mle/logistic.rb +113 -0
  18. data/lib/statsample-glm/glm/mle/normal.rb +94 -0
  19. data/lib/statsample-glm/glm/mle/probit.rb +100 -0
  20. data/lib/statsample-glm/glm/normal.rb +17 -0
  21. data/lib/statsample-glm/glm/poisson.rb +17 -0
  22. data/lib/statsample-glm/glm/probit.rb +16 -0
  23. data/lib/statsample-glm/version.rb +5 -0
  24. data/spec/data/logistic.csv +51 -0
  25. data/spec/data/logistic_mle.csv +201 -0
  26. data/spec/data/normal.csv +30 -0
  27. data/spec/logistic_spec.rb +37 -0
  28. data/spec/normal_spec.rb +15 -0
  29. data/spec/poisson_spec.rb +32 -0
  30. data/spec/probit_spec.rb +19 -0
  31. data/spec/spec_helper.rb +50 -0
  32. data/statsample-glm.gemspec +35 -0
  33. metadata +71 -145
  34. data/VERSION +0 -1
  35. data/features/bio-statsample-glm.feature +0 -9
  36. data/features/step_definitions/bio-statsample-glm_steps.rb +0 -0
  37. data/features/support/env.rb +0 -15
  38. data/lib/statsample-glm/regression/logistic.rb +0 -108
  39. data/lib/statsample-glm/regression/poisson.rb +0 -90
  40. data/test/helper.rb +0 -87
  41. data/test/test_glm.rb +0 -4
  42. data/test/test_glm_logistic.rb +0 -23
  43. data/test/test_glm_poisson.rb +0 -25
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.1
@@ -1,9 +0,0 @@
1
- Feature: something something
2
- In order to something something
3
- A user something something
4
- something something something
5
-
6
- Scenario: something something
7
- Given inspiration
8
- When I create a sweet new gem
9
- Then everyone should see how awesome I am
@@ -1,15 +0,0 @@
1
- require 'bundler'
2
- begin
3
- Bundler.setup(:default, :development)
4
- rescue Bundler::BundlerError => e
5
- $stderr.puts e.message
6
- $stderr.puts "Run `bundle install` to install missing gems"
7
- exit e.status_code
8
- end
9
-
10
- $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
11
- require 'statsample-glm'
12
-
13
- require 'test/unit/assertions'
14
-
15
- World(Test::Unit::Assertions)
@@ -1,108 +0,0 @@
1
- module Statsample
2
- module Regression
3
- module GLM
4
-
5
- class Logistic
6
-
7
- attr_reader :se
8
- # The fitted mean values
9
- attr_reader :fit
10
- # the _working_ residuals; that is the residuals in the final iteration of the IRWLS fit.
11
- attr_reader :residuals
12
- # The residuals degree of freedom
13
- attr_reader :df
14
- # Number of iterations used for convergence
15
- attr_reader :iter
16
- # Boolean. Tells whether the IRWLS for the given model converged or not
17
- attr_reader :converged
18
-
19
- def initialize(ds, y)
20
- @ds=ds
21
- @fields=@ds.fields
22
- @x = ds.to_matrix
23
- @y = y
24
- end
25
-
26
- # named vector/hash of coefficients
27
- # === Parameter
28
- # * *type*: symbol; (:array, default). Options = [:array, :hash]
29
- def coefficients(type=:array)
30
- if type==:array
31
- #originally returned as vector; so pass it
32
- @coefficients
33
- elsif type==:hash
34
- h={}
35
- @fields.size.times {|i|
36
- h[@fields[i]]=@coefficients[i]
37
- }
38
- h
39
- end
40
- end
41
- def self.mu(x, b)
42
- matrix_mul = x * b
43
- numerator = matrix_mul.map { |y| Math.exp(y) }
44
- denominator = numerator.map { |y| 1 + y }
45
-
46
- numerator.each_with_index { |e, r, c|
47
- numerator[r,c] = numerator[r,c].to_f / denominator[r,c].to_f
48
- }
49
- end
50
-
51
- def self.w(x, b)
52
- mus = mu(x,b).column_vectors.map(&:to_a).flatten
53
- mus_intermediate = mus.collect { |x| 1 - x }
54
- w = mus.zip(mus_intermediate).collect { |x| x.inject(:*) }
55
- w_mat = Matrix.I(w.size)
56
- w_enum = w.to_enum
57
- return w_mat.map do |x|
58
- x.eql?(1) ? w_enum.next : x
59
- end
60
- end
61
-
62
- def self.h(x,b,y)
63
- x_t = x.transpose
64
- mu_flat = mu(x,b).column_vectors.map(&:to_a).flatten
65
- column_data = y.zip(mu_flat).collect { |x| x.inject(:-) }
66
- x_t * Matrix.column_vector(column_data)
67
- end
68
-
69
- def self.j(x,b)
70
- w_matrix = w(x, b)
71
- jacobian_matrix = x.transpose * w_matrix * x
72
- jacobian_matrix.map { |x| -x }
73
- end
74
-
75
- def to_s
76
- sprintf("Logistic Regression (Statsample::Regression::GLM;:Logistic)")
77
- end
78
-
79
- # = Iteratively reweighted least squares
80
- # Computes irwls for given model and parameters.
81
- #
82
- # == Usage
83
- # require 'statsample-glm'
84
- # x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
85
- # x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
86
- # y=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
87
- # x=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
88
- # obj = Statsample::Regression.glm(x, y, :binomial)
89
- # #=> Logistic Regression object
90
- # obj.irlws
91
- # #=> Array of returned values
92
- # obj.coefficients
93
- # #=> named vector of coefficients
94
-
95
- def irwls
96
- x, y = @x, @y
97
- #calling irwls on Regression and passing equivalent methods in lambdas.
98
- #Ruby_level+=awesome!
99
- @coefficients, @se, @fit, @residuals, @df, @iter, @converged = Statsample::Regression.irwls(
100
- x,y, ->l,m{self.class.mu(l,m)}, ->l,m{self.class.w(l,m)},
101
- ->l,m{self.class.j(l,m)}, ->k,l,m{self.class.h(k,l,m)}
102
- )
103
- end
104
- end
105
-
106
- end
107
- end
108
- end
@@ -1,90 +0,0 @@
1
- module Statsample
2
- module Regression
3
- module GLM
4
-
5
- class Poisson
6
-
7
- attr_reader :se
8
- # The fitted mean values
9
- attr_reader :fit
10
- # the _working_ residuals; that is the residuals in the final iteration of the IRWLS fit.
11
- attr_reader :residuals
12
- # The residuals degree of freedom
13
- attr_reader :df
14
- # Number of iterations used for convergence
15
- attr_reader :iter
16
- # Boolean. Tells whether the IRWLS for the given model converged or not
17
- attr_reader :converged
18
-
19
- def initialize(ds, y)
20
- @ds=ds
21
- @fields=@ds.fields
22
- @x = ds.to_matrix
23
- @y = y
24
- end
25
-
26
- # named vector/hash of coefficients
27
- # === Parameter
28
- # * *type*: symbol; (:array, default). Options = [:array, :hash]
29
- def coefficients(type=:array)
30
- if type==:array
31
- @coefficients
32
- elsif type==:hash
33
- h={}
34
- @fields.size.times {|i|
35
- h[@fields[i]]=@coefficients[i]
36
- }
37
- h
38
- end
39
- end
40
-
41
- def self.mu(x, b, link=:log)
42
- if link.downcase.to_sym == :log
43
- (x * b).map { |y| Math.exp(y) }
44
- elsif link.downcase.to_sym == :sqrt
45
- (x * b).collect { |y| y**2 }
46
- end
47
- end
48
-
49
- def self.w(x, b)
50
- poisson_mu = mu(x,b)
51
- mu_flat = poisson_mu.column_vectors.map(&:to_a).flatten
52
-
53
- w_mat = Matrix.I(mu_flat.size)
54
- mu_enum = mu_flat.to_enum
55
- return w_mat.map do |x|
56
- x.eql?(1) ? mu_enum.next : x
57
- end
58
- end
59
-
60
- def self.h(x, b, y)
61
- x_t = x.transpose
62
- mu_flat = mu(x,b).column_vectors.map(&:to_a).flatten
63
- column_data = y.zip(mu_flat).collect { |x| x.inject(:-) }
64
- x_t * Matrix.columns([column_data])
65
- end
66
-
67
- def self.j(x, b)
68
- w_matrix = w(x, b)
69
- jacobian_matrix = x.transpose * w_matrix * x
70
- jacobian_matrix.map { |x| -x }
71
- end
72
-
73
- def to_s
74
- sprintf("Logistic Regression (Statsample::Regression::GLM;:Logistic)")
75
- end
76
-
77
- def irwls
78
- x,y = @x,@y
79
- #calling irwls on Regression and passing equivalent methods in lambdas.
80
- #Ruby_level+=awesome!
81
- @coefficients, @se, @fit, @residuals, @df, @iter, @converged = Statsample::Regression.irwls(
82
- x,y, ->l,m{self.class.mu(l,m)}, ->l,m{self.class.w(l,m)},
83
- ->l,m{self.class.j(l,m)}, ->k,l,m{self.class.h(k,l,m)}
84
- )
85
- end
86
-
87
- end
88
- end
89
- end
90
- end
@@ -1,87 +0,0 @@
1
- require 'rubygems'
2
- require 'bundler'
3
- begin
4
- Bundler.setup(:default, :development)
5
- rescue Bundler::BundlerError => e
6
- $stderr.puts e.message
7
- $stderr.puts "Run `bundle install` to install missing gems"
8
- exit e.status_code
9
- end
10
- require 'minitest/unit'
11
- require 'shoulda'
12
- require 'shoulda-context'
13
- require 'mocha/setup'
14
-
15
-
16
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
17
- $LOAD_PATH.unshift(File.dirname(__FILE__))
18
- require 'statsample-glm'
19
- module MiniTest
20
- class Unit
21
- class TestCase
22
- include Shoulda::Context::Assertions
23
- include Shoulda::Context::InstanceMethods
24
- extend Shoulda::Context::ClassMethods
25
- def self.should_with_gsl(name,&block)
26
- should(name) do
27
- if Statsample.has_gsl?
28
- instance_eval(&block)
29
- else
30
- skip("Requires GSL")
31
- end
32
- end
33
- end
34
- end
35
- end
36
-
37
- module Assertions
38
- def assert_similar_vector(exp, obs, delta=1e-10,msg=nil)
39
- msg||="Different vectors #{exp} - #{obs}"
40
- assert_equal(exp.size, obs.size)
41
- exp.data_with_nils.each_with_index {|v,i|
42
- assert_in_delta(v,obs[i],delta)
43
- }
44
- end
45
- def assert_similar_hash(exp, obs, delta=1e-10,msg=nil)
46
- msg||="Different hash #{exp} - #{obs}"
47
- assert_equal(exp.size, obs.size)
48
- exp.each_key {|k|
49
- assert_in_delta(exp[k],obs[k],delta)
50
- }
51
- end
52
-
53
- def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
54
- assert_equal(exp.size, obs.size, "Different size.#{msg}")
55
- exp.size.times {|i|
56
- assert_in_delta(exp[i],obs[i],delta, "Different element #{i}. \nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
57
- }
58
- end
59
- def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
60
- assert_equal(exp.row_size, obs.row_size, "Different row size.#{msg}")
61
- assert_equal(exp.column_size, obs.column_size, "Different column size.#{msg}")
62
- exp.row_size.times {|i|
63
- exp.column_size.times {|j|
64
- assert_in_delta(exp[i,j],obs[i,j], delta, "Different element #{i},#{j}\nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
65
- }
66
- }
67
- end
68
- alias :assert_raise :assert_raises unless method_defined? :assert_raise
69
- alias :assert_not_equal :refute_equal unless method_defined? :assert_not_equal
70
- alias :assert_not_same :refute_same unless method_defined? :assert_not_same
71
- unless method_defined? :assert_nothing_raised
72
- def assert_nothing_raised(msg=nil)
73
- msg||="Nothing should be raised, but raised %s"
74
- begin
75
- yield
76
- not_raised=true
77
- rescue Exception => e
78
- not_raised=false
79
- msg=sprintf(msg,e)
80
- end
81
- assert(not_raised,msg)
82
- end
83
- end
84
- end
85
- end
86
-
87
- MiniTest::Unit.autorun
@@ -1,4 +0,0 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
-
3
- class StatsampleRegressionGlm < MiniTest::Unit::TestCase
4
- end
@@ -1,23 +0,0 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
-
3
- class StatsampleRegressionGlmLogistic < MiniTest::Unit::TestCase
4
-
5
- context("Example") do
6
- setup do
7
- x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
- x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
- @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
- @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
- intercept=Statsample::Vector.new([1]*50,:scale)
12
- @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
- @glm=Statsample::Regression.glm(@df,@y_log,:binomial)
14
- end
15
- should "report correct coefficientes as array" do
16
- assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
17
- end
18
- should "report correct coefficientes as hash" do
19
- assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.675603176233325,"x1"=>-0.312493754568903,"x2"=>2.28671333346264})
20
- end
21
-
22
- end
23
- end
@@ -1,25 +0,0 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
-
3
- class StatsampleRegressionGlmPoisson < MiniTest::Unit::TestCase
4
-
5
- context("Example") do
6
- setup do
7
- x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
- x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
- @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
- @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
- intercept=Statsample::Vector.new([1]*50,:scale)
12
- @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
- @glm=Statsample::Regression.glm(@df,@y_pois,:poisson)
14
-
15
- end
16
- should "report correct coefficientes as array" do
17
- assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
18
- end # should
19
- should "report correct coefficientes as hash" do
20
- assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.32993246633711,"x1"=>-0.586359358356708, "x2"=>1.28511323439258})
21
- end # should
22
-
23
- end # context
24
- end # class
25
-