bio-statsample-glm 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.travis.yml ADDED
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - 2.0.0
6
+ - jruby-19mode # JRuby in 1.9 mode
7
+ - rbx-19mode
8
+ # - 1.8.7
9
+ # - jruby-18mode # JRuby in 1.8 mode
10
+ # - rbx-18mode
11
+
12
+ # uncomment this line if your project needs to run something other than `rake`:
13
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,20 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'statsample', '>=1.2.0'
4
+ # Add dependencies required to use your gem here.
5
+ # Example:
6
+ gem "activesupport", "= 3.2.10"
7
+
8
+ # Add dependencies to develop your gem here.
9
+ # Include everything needed to run rake, tests, features, etc.
10
+ group :development do
11
+ gem "shoulda", ">= 0"
12
+ gem "rdoc", "~> 3.12"
13
+ gem "minitest", "~> 4.7.5"
14
+ gem "cucumber", ">= 0"
15
+ gem "bundler", "~> 1.3.5"
16
+ gem "jeweler", "~> 1.8.4"
17
+ gem "bio", ">= 1.4.2"
18
+ gem "rdoc", "~> 3.12"
19
+ gem "mocha", "~> 0.14.0"
20
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 Ankur Goel
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # bio-statsample-glm
2
+
3
+ [![Build Status](https://secure.travis-ci.org/AnkurGel/bioruby-statsample-glm.png)](http://travis-ci.org/AnkurGel/bioruby-statsample-glm)
4
+
5
+ Statsample-GLM is an extension to [Statsample](https://github.com/clbustos/statsample), a suite of advance statistics in Ruby. It includes modules for various regression techniques such as:
6
+
7
+ * Poisson Regression
8
+ * Logistic Regression
9
+ * Exponential Regression
10
+
11
+ Note: this software is under active development!
12
+
13
+ ## Installation
14
+
15
+ ```sh
16
+ gem install bio-statsample-glm
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```ruby
22
+ require 'bio-statsample-glm'
23
+ ```
24
+
25
+ The API doc is online. For more code examples see the test files in
26
+ the source tree.
27
+
28
+ ## Project home page
29
+
30
+ Information on the source tree, documentation, examples, issues and
31
+ how to contribute, see
32
+
33
+ http://github.com/AnkurGel/bioruby-statsample-glm
34
+
35
+ The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
36
+
37
+ ## Cite
38
+
39
+ If you use this software, please cite one of
40
+
41
+ * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
42
+ * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
43
+
44
+ ## Biogems.info
45
+
46
+ This Biogem is published at [#bio-statsample-glm](http://biogems.info/index.html)
47
+
48
+ ## Copyright
49
+
50
+ Copyright (c) 2013 Ankur Goel. See LICENSE.txt for further details.
51
+
data/README.rdoc ADDED
@@ -0,0 +1,48 @@
1
+ = bio-statsample-glm
2
+
3
+ {<img
4
+ src="https://secure.travis-ci.org/ankurgel/bioruby-statsample-glm.png"
5
+ />}[http://travis-ci.org/#!/ankurgel/bioruby-statsample-glm]
6
+
7
+ Full description goes here
8
+
9
+ Note: this software is under active development!
10
+
11
+ == Installation
12
+
13
+ gem install bio-statsample-glm
14
+
15
+ == Usage
16
+
17
+ == Developers
18
+
19
+ To use the library
20
+
21
+ require 'bio-statsample-glm'
22
+
23
+ The API doc is online. For more code examples see also the test files in
24
+ the source tree.
25
+
26
+ == Project home page
27
+
28
+ Information on the source tree, documentation, issues and how to contribute, see
29
+
30
+ http://github.com/ankurgel/bioruby-statsample-glm
31
+
32
+ The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
33
+
34
+ == Cite
35
+
36
+ If you use this software, please cite one of
37
+
38
+ * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
39
+ * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
40
+
41
+ == Biogems.info
42
+
43
+ This Biogem is published at http://biogems.info/index.html#bio-statsample-glm
44
+
45
+ == Copyright
46
+
47
+ Copyright (c) 2013 Ankur Goel. See LICENSE.txt for further details.
48
+
data/Rakefile ADDED
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-statsample-glm"
18
+ gem.homepage = "http://github.com/AnkurGel/bioruby-statsample-glm"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Generalized Linear Models for Statsample}
21
+ gem.description = %Q{Statsample-GLM is an extension to Statsample, an advance statistics suite in Ruby. This gem includes modules for Regression techniques such as Poisson Regression, Logistic Regression and Exponential Regression}
22
+ gem.email = "ankurgel@gmail.com"
23
+ gem.authors = ["Ankur Goel"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ require 'cucumber/rake/task'
36
+ Cucumber::Rake::Task.new(:features)
37
+
38
+ task :default => :test
39
+
40
+ require 'rdoc/task'
41
+ Rake::RDocTask.new do |rdoc|
42
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
43
+
44
+ rdoc.rdoc_dir = 'rdoc'
45
+ rdoc.title = "bio-statsample-glm #{version}"
46
+ rdoc.rdoc_files.include('README*')
47
+ rdoc.rdoc_files.include('lib/**/*.rb')
48
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,9 @@
1
+ Feature: something something
2
+ In order to something something
3
+ A user something something
4
+ something something something
5
+
6
+ Scenario: something something
7
+ Given inspiration
8
+ When I create a sweet new gem
9
+ Then everyone should see how awesome I am
@@ -0,0 +1,15 @@
1
+ require 'bundler'
2
+ begin
3
+ Bundler.setup(:default, :development)
4
+ rescue Bundler::BundlerError => e
5
+ $stderr.puts e.message
6
+ $stderr.puts "Run `bundle install` to install missing gems"
7
+ exit e.status_code
8
+ end
9
+
10
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
11
+ require 'bio-statsample-glm'
12
+
13
+ require 'test/unit/assertions'
14
+
15
+ World(Test::Unit::Assertions)
@@ -0,0 +1,12 @@
1
+ # Please require your code below, respecting the naming conventions in the
2
+ # bioruby directory tree.
3
+ #
4
+ # For example, say you have a plugin named bio-plugin, the only uncommented
5
+ # line in this file would be
6
+ #
7
+ # require 'bio/bio-plugin/plugin'
8
+ #
9
+ # In this file only require other files. Avoid other source code.
10
+
11
+ require 'statsample'
12
+ require 'bio-statsample-glm/regression'
@@ -0,0 +1,66 @@
1
+ require 'bio-statsample-glm/regression/poisson'
2
+ require 'bio-statsample-glm/regression/logistic'
3
+ module Statsample
4
+ module Regression
5
+ include Statsample::VectorShorthands
6
+
7
+ # = Generalized linear models
8
+ # == Parameters
9
+ #
10
+ # * x = model matrix
11
+ # * y = response vector
12
+ # * method = symbol; choice of glm strategy, default = :poisson
13
+ #
14
+ # == Usage
15
+ # require 'bio-statsample-glm'
16
+ # x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
17
+ # x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
18
+ # y=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
19
+ # x=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
20
+ # obj = Statsample::Regression.glm(x, y, :binomial)
21
+ # #=> Logistic Regression object
22
+ #
23
+ # == Returns
24
+ # GLM object for given method.
25
+ def self.glm(x, y, method=:poisson)
26
+ if method.downcase.to_sym == :poisson
27
+ obj = Statsample::Regression::GLM::Poisson.new(x,y)
28
+ elsif method.downcase.to_sym == :binomial
29
+ obj = Statsample::Regression::GLM::Logistic.new(x,y)
30
+ end
31
+ obj
32
+ #now, #irwls method is available to be called on returned obj
33
+ end
34
+
35
+
36
+ def self.irwls(x, y, mu, w, j, h, epsilon = 1e-7, max_iter = 100)
37
+ b = Matrix.column_vector(Array.new(x.column_size,0.0))
38
+ converged = false
39
+ 1.upto(max_iter) do |i|
40
+ #conversion from : (solve(j(x,b)) %*% h(x,b,y))
41
+
42
+ intermediate = (j.call(x,b).inverse * h.call(x,b,y))
43
+ b_new = b - intermediate
44
+
45
+ if((b_new - b).map(&:abs)).to_a.flatten.inject(:+) < epsilon
46
+ converged = true
47
+ b = b_new
48
+ break
49
+ end
50
+ b = b_new
51
+ end
52
+ ss = j.call(x,b).inverse.diagonal.map{ |x| -x}.map{ |y| Math.sqrt(y) }
53
+ values = mu.call(x,b)
54
+
55
+ residuals = y - values.column_vectors.map(&:to_a).flatten
56
+ df_residuals = y.count - x.column_size
57
+ return [create_vector(b.column_vectors[0]), create_vector(ss), create_vector(values.to_a.flatten),
58
+ residuals, max_iter, df_residuals, converged]
59
+ end
60
+
61
+ private
62
+ def self.create_vector(arr)
63
+ Statsample::Vector.new(arr, :scale)
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,94 @@
1
+ module Statsample
2
+ module Regression
3
+ module GLM
4
+
5
+ class Logistic
6
+
7
+ # a named vector of coefficients
8
+ attr_reader :coefficients
9
+ #
10
+ attr_reader :se
11
+ # The fitted mean values
12
+ attr_reader :fit
13
+ # the _working_ residuals; that is the residuals in the final iteration of the IRWLS fit.
14
+ attr_reader :residuals
15
+ # The residuals degree of freedom
16
+ attr_reader :df
17
+ # Number of iterations used for convergence
18
+ attr_reader :iter
19
+ # Boolean. Tells whether the IRWLS for the given model converged or not
20
+ attr_reader :converged
21
+
22
+ def initialize(x, y)
23
+ @x = x
24
+ @y = y
25
+ end
26
+
27
+ def self.mu(x, b)
28
+ matrix_mul = x * b
29
+ numerator = matrix_mul.map { |y| Math.exp(y) }
30
+ denominator = numerator.map { |y| 1 + y }
31
+
32
+ numerator.each_with_index { |e, r, c|
33
+ numerator[r,c] = numerator[r,c].to_f / denominator[r,c].to_f
34
+ }
35
+ end
36
+
37
+ def self.w(x, b)
38
+ mus = mu(x,b).column_vectors.map(&:to_a).flatten
39
+ mus_intermediate = mus.collect { |x| 1 - x }
40
+ w = mus.zip(mus_intermediate).collect { |x| x.inject(:*) }
41
+ w_mat = Matrix.I(w.size)
42
+ w_enum = w.to_enum
43
+ return w_mat.map do |x|
44
+ x.eql?(1) ? w_enum.next : x
45
+ end
46
+ end
47
+
48
+ def self.h(x,b,y)
49
+ x_t = x.transpose
50
+ mu_flat = mu(x,b).column_vectors.map(&:to_a).flatten
51
+ column_data = y.zip(mu_flat).collect { |x| x.inject(:-) }
52
+ x_t * Matrix.column_vector(column_data)
53
+ end
54
+
55
+ def self.j(x,b)
56
+ w_matrix = w(x, b)
57
+ jacobian_matrix = x.transpose * w_matrix * x
58
+ jacobian_matrix.map { |x| -x }
59
+ end
60
+
61
+ def to_s
62
+ sprintf("Logistic Regression (Statsample::Regression::GLM;:Logistic)")
63
+ end
64
+
65
+ # = Iteratively reweighted least squares
66
+ # Computes irwls for given model and parameters.
67
+ #
68
+ # == Usage
69
+ # require 'bio-statsample-glm'
70
+ # x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
71
+ # x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
72
+ # y=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
73
+ # x=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
74
+ # obj = Statsample::Regression.glm(x, y, :binomial)
75
+ # #=> Logistic Regression object
76
+ # obj.irlws
77
+ # #=> Array of returned values
78
+ # obj.coefficients
79
+ # #=> named vector of coefficients
80
+
81
+ def irwls
82
+ x, y = @x, @y
83
+ #calling irwls on Regression and passing equivalent methods in lambdas.
84
+ #Ruby_level+=awesome!
85
+ @coefficients, @se, @fit, @residuals, @df, @iter, @converged = Statsample::Regression.irwls(
86
+ x,y, ->l,m{self.class.mu(l,m)}, ->l,m{self.class.w(l,m)},
87
+ ->l,m{self.class.j(l,m)}, ->k,l,m{self.class.h(k,l,m)}
88
+ )
89
+ end
90
+ end
91
+
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,76 @@
1
+ module Statsample
2
+ module Regression
3
+ module GLM
4
+
5
+ class Poisson
6
+
7
+ # a named vector of coefficients
8
+ attr_reader :coefficients
9
+ #
10
+ attr_reader :se
11
+ # The fitted mean values
12
+ attr_reader :fit
13
+ # the _working_ residuals; that is the residuals in the final iteration of the IRWLS fit.
14
+ attr_reader :residuals
15
+ # The residuals degree of freedom
16
+ attr_reader :df
17
+ # Number of iterations used for convergence
18
+ attr_reader :iter
19
+ # Boolean. Tells whether the IRWLS for the given model converged or not
20
+ attr_reader :converged
21
+
22
+ def initialize(x, y)
23
+ @x = x
24
+ @y = y
25
+ end
26
+
27
+ def self.mu(x, b, link=:log)
28
+ if link.downcase.to_sym == :log
29
+ (x * b).map { |y| Math.exp(y) }
30
+ elsif link.downcase.to_sym == :sqrt
31
+ (x * b).collect { |y| y**2 }
32
+ end
33
+ end
34
+
35
+ def self.w(x, b)
36
+ poisson_mu = mu(x,b)
37
+ mu_flat = poisson_mu.column_vectors.map(&:to_a).flatten
38
+
39
+ w_mat = Matrix.I(mu_flat.size)
40
+ mu_enum = mu_flat.to_enum
41
+ return w_mat.map do |x|
42
+ x.eql?(1) ? mu_enum.next : x
43
+ end
44
+ end
45
+
46
+ def self.h(x, b, y)
47
+ x_t = x.transpose
48
+ mu_flat = mu(x,b).column_vectors.map(&:to_a).flatten
49
+ column_data = y.zip(mu_flat).collect { |x| x.inject(:-) }
50
+ x_t * Matrix.columns([column_data])
51
+ end
52
+
53
+ def self.j(x, b)
54
+ w_matrix = w(x, b)
55
+ jacobian_matrix = x.transpose * w_matrix * x
56
+ jacobian_matrix.map { |x| -x }
57
+ end
58
+
59
+ def to_s
60
+ sprintf("Logistic Regression (Statsample::Regression::GLM;:Logistic)")
61
+ end
62
+
63
+ def irwls
64
+ x,y = @x,@y
65
+ #calling irwls on Regression and passing equivalent methods in lambdas.
66
+ #Ruby_level+=awesome!
67
+ @coefficients, @se, @fit, @residuals, @df, @iter, @converged = Statsample::Regression.irwls(
68
+ x,y, ->l,m{self.class.mu(l,m)}, ->l,m{self.class.w(l,m)},
69
+ ->l,m{self.class.j(l,m)}, ->k,l,m{self.class.h(k,l,m)}
70
+ )
71
+ end
72
+
73
+ end
74
+ end
75
+ end
76
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,79 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'minitest/unit'
11
+ require 'shoulda'
12
+ require 'shoulda-context'
13
+ require 'mocha/setup'
14
+
15
+
16
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
17
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
18
+ require 'bio-statsample-glm'
19
+ module MiniTest
20
+ class Unit
21
+ class TestCase
22
+ include Shoulda::Context::Assertions
23
+ include Shoulda::Context::InstanceMethods
24
+ extend Shoulda::Context::ClassMethods
25
+ def self.should_with_gsl(name,&block)
26
+ should(name) do
27
+ if Statsample.has_gsl?
28
+ instance_eval(&block)
29
+ else
30
+ skip("Requires GSL")
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ module Assertions
38
+ def assert_similar_vector(exp, obs, delta=1e-10,msg=nil)
39
+ msg||="Different vectors #{exp} - #{obs}"
40
+ assert_equal(exp.size, obs.size)
41
+ exp.data_with_nils.each_with_index {|v,i|
42
+ assert_in_delta(v,obs[i],delta)
43
+ }
44
+ end
45
+ def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
46
+ assert_equal(exp.size, obs.size, "Different size.#{msg}")
47
+ exp.size.times {|i|
48
+ assert_in_delta(exp[i],obs[i],delta, "Different element #{i}. \nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
49
+ }
50
+ end
51
+ def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
52
+ assert_equal(exp.row_size, obs.row_size, "Different row size.#{msg}")
53
+ assert_equal(exp.column_size, obs.column_size, "Different column size.#{msg}")
54
+ exp.row_size.times {|i|
55
+ exp.column_size.times {|j|
56
+ assert_in_delta(exp[i,j],obs[i,j], delta, "Different element #{i},#{j}\nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
57
+ }
58
+ }
59
+ end
60
+ alias :assert_raise :assert_raises unless method_defined? :assert_raise
61
+ alias :assert_not_equal :refute_equal unless method_defined? :assert_not_equal
62
+ alias :assert_not_same :refute_same unless method_defined? :assert_not_same
63
+ unless method_defined? :assert_nothing_raised
64
+ def assert_nothing_raised(msg=nil)
65
+ msg||="Nothing should be raised, but raised %s"
66
+ begin
67
+ yield
68
+ not_raised=true
69
+ rescue Exception => e
70
+ not_raised=false
71
+ msg=sprintf(msg,e)
72
+ end
73
+ assert(not_raised,msg)
74
+ end
75
+ end
76
+ end
77
+ end
78
+
79
+ MiniTest::Unit.autorun
data/test/test_glm.rb ADDED
@@ -0,0 +1,37 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
+
3
+ class StatsampleRegressionGlm < MiniTest::Unit::TestCase
4
+
5
+ context("Example") do
6
+ setup do
7
+ x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
+ x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
+ @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
+ @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
+ intercept=Statsample::Vector.new([1]*50,:scale)
12
+ @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
+ end
14
+
15
+ context("Logistic") do
16
+ setup do
17
+ @glm=Statsample::Regression.glm(@df.to_matrix,@y_log,:binomial)
18
+ @glm.irwls
19
+ end
20
+
21
+ should "report correct coefficientes" do
22
+ assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
23
+ end
24
+ end
25
+
26
+ context("Poisson") do
27
+ setup do
28
+ @glm=Statsample::Regression.glm(@df.to_matrix,@y_pois,:poisson)
29
+ @glm.irwls
30
+ end
31
+
32
+ should "report correct coefficientes" do
33
+ assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
34
+ end
35
+ end
36
+ end
37
+ end
metadata ADDED
@@ -0,0 +1,246 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-statsample-glm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ankur Goel
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-09-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: statsample
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.2.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.2.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: activesupport
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - '='
36
+ - !ruby/object:Gem::Version
37
+ version: 3.2.10
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - '='
44
+ - !ruby/object:Gem::Version
45
+ version: 3.2.10
46
+ - !ruby/object:Gem::Dependency
47
+ name: shoulda
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rdoc
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '3.12'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '3.12'
78
+ - !ruby/object:Gem::Dependency
79
+ name: minitest
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 4.7.5
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 4.7.5
94
+ - !ruby/object:Gem::Dependency
95
+ name: cucumber
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: bundler
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: 1.3.5
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ~>
124
+ - !ruby/object:Gem::Version
125
+ version: 1.3.5
126
+ - !ruby/object:Gem::Dependency
127
+ name: jeweler
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: 1.8.4
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: 1.8.4
142
+ - !ruby/object:Gem::Dependency
143
+ name: bio
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ! '>='
148
+ - !ruby/object:Gem::Version
149
+ version: 1.4.2
150
+ type: :development
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ! '>='
156
+ - !ruby/object:Gem::Version
157
+ version: 1.4.2
158
+ - !ruby/object:Gem::Dependency
159
+ name: rdoc
160
+ requirement: !ruby/object:Gem::Requirement
161
+ none: false
162
+ requirements:
163
+ - - ~>
164
+ - !ruby/object:Gem::Version
165
+ version: '3.12'
166
+ type: :development
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ~>
172
+ - !ruby/object:Gem::Version
173
+ version: '3.12'
174
+ - !ruby/object:Gem::Dependency
175
+ name: mocha
176
+ requirement: !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ~>
180
+ - !ruby/object:Gem::Version
181
+ version: 0.14.0
182
+ type: :development
183
+ prerelease: false
184
+ version_requirements: !ruby/object:Gem::Requirement
185
+ none: false
186
+ requirements:
187
+ - - ~>
188
+ - !ruby/object:Gem::Version
189
+ version: 0.14.0
190
+ description: Statsample-GLM is an extension to Statsample, an advance statistics suite
191
+ in Ruby. This gem includes modules for Regression techniques such as Poisson Regression,
192
+ Logistic Regression and Exponential Regression
193
+ email: ankurgel@gmail.com
194
+ executables: []
195
+ extensions: []
196
+ extra_rdoc_files:
197
+ - LICENSE.txt
198
+ - README.md
199
+ - README.rdoc
200
+ files:
201
+ - .document
202
+ - .travis.yml
203
+ - Gemfile
204
+ - LICENSE.txt
205
+ - README.md
206
+ - README.rdoc
207
+ - Rakefile
208
+ - VERSION
209
+ - features/bio-statsample-glm.feature
210
+ - features/step_definitions/bio-statsample-glm_steps.rb
211
+ - features/support/env.rb
212
+ - lib/bio-statsample-glm.rb
213
+ - lib/bio-statsample-glm/regression.rb
214
+ - lib/bio-statsample-glm/regression/logistic.rb
215
+ - lib/bio-statsample-glm/regression/poisson.rb
216
+ - test/helper.rb
217
+ - test/test_glm.rb
218
+ homepage: http://github.com/AnkurGel/bioruby-statsample-glm
219
+ licenses:
220
+ - MIT
221
+ post_install_message:
222
+ rdoc_options: []
223
+ require_paths:
224
+ - lib
225
+ required_ruby_version: !ruby/object:Gem::Requirement
226
+ none: false
227
+ requirements:
228
+ - - ! '>='
229
+ - !ruby/object:Gem::Version
230
+ version: '0'
231
+ segments:
232
+ - 0
233
+ hash: 797002845
234
+ required_rubygems_version: !ruby/object:Gem::Requirement
235
+ none: false
236
+ requirements:
237
+ - - ! '>='
238
+ - !ruby/object:Gem::Version
239
+ version: '0'
240
+ requirements: []
241
+ rubyforge_project:
242
+ rubygems_version: 1.8.25
243
+ signing_key:
244
+ specification_version: 3
245
+ summary: Generalized Linear Models for Statsample
246
+ test_files: []