bio-statsample-glm 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.travis.yml +13 -0
- data/Gemfile +20 -0
- data/LICENSE.txt +20 -0
- data/README.md +51 -0
- data/README.rdoc +48 -0
- data/Rakefile +48 -0
- data/VERSION +1 -0
- data/features/bio-statsample-glm.feature +9 -0
- data/features/step_definitions/bio-statsample-glm_steps.rb +0 -0
- data/features/support/env.rb +15 -0
- data/lib/bio-statsample-glm.rb +12 -0
- data/lib/bio-statsample-glm/regression.rb +66 -0
- data/lib/bio-statsample-glm/regression/logistic.rb +94 -0
- data/lib/bio-statsample-glm/regression/poisson.rb +76 -0
- data/test/helper.rb +79 -0
- data/test/test_glm.rb +37 -0
- metadata +246 -0
data/.document
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.2
|
4
|
+
- 1.9.3
|
5
|
+
- 2.0.0
|
6
|
+
- jruby-19mode # JRuby in 1.9 mode
|
7
|
+
- rbx-19mode
|
8
|
+
# - 1.8.7
|
9
|
+
# - jruby-18mode # JRuby in 1.8 mode
|
10
|
+
# - rbx-18mode
|
11
|
+
|
12
|
+
# uncomment this line if your project needs to run something other than `rake`:
|
13
|
+
# script: bundle exec rspec spec
|
data/Gemfile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem 'statsample', '>=1.2.0'
|
4
|
+
# Add dependencies required to use your gem here.
|
5
|
+
# Example:
|
6
|
+
gem "activesupport", "= 3.2.10"
|
7
|
+
|
8
|
+
# Add dependencies to develop your gem here.
|
9
|
+
# Include everything needed to run rake, tests, features, etc.
|
10
|
+
group :development do
|
11
|
+
gem "shoulda", ">= 0"
|
12
|
+
gem "rdoc", "~> 3.12"
|
13
|
+
gem "minitest", "~> 4.7.5"
|
14
|
+
gem "cucumber", ">= 0"
|
15
|
+
gem "bundler", "~> 1.3.5"
|
16
|
+
gem "jeweler", "~> 1.8.4"
|
17
|
+
gem "bio", ">= 1.4.2"
|
18
|
+
gem "rdoc", "~> 3.12"
|
19
|
+
gem "mocha", "~> 0.14.0"
|
20
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Ankur Goel
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# bio-statsample-glm
|
2
|
+
|
3
|
+
[![Build Status](https://secure.travis-ci.org/AnkurGel/bioruby-statsample-glm.png)](http://travis-ci.org/AnkurGel/bioruby-statsample-glm)
|
4
|
+
|
5
|
+
Statsample-GLM is an extension to [Statsample](https://github.com/clbustos/statsample), a suite of advance statistics in Ruby. It includes modules for various regression techniques such as:
|
6
|
+
|
7
|
+
* Poisson Regression
|
8
|
+
* Logistic Regression
|
9
|
+
* Exponential Regression
|
10
|
+
|
11
|
+
Note: this software is under active development!
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
```sh
|
16
|
+
gem install bio-statsample-glm
|
17
|
+
```
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
require 'bio-statsample-glm'
|
23
|
+
```
|
24
|
+
|
25
|
+
The API doc is online. For more code examples see the test files in
|
26
|
+
the source tree.
|
27
|
+
|
28
|
+
## Project home page
|
29
|
+
|
30
|
+
Information on the source tree, documentation, examples, issues and
|
31
|
+
how to contribute, see
|
32
|
+
|
33
|
+
http://github.com/AnkurGel/bioruby-statsample-glm
|
34
|
+
|
35
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
36
|
+
|
37
|
+
## Cite
|
38
|
+
|
39
|
+
If you use this software, please cite one of
|
40
|
+
|
41
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
42
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
43
|
+
|
44
|
+
## Biogems.info
|
45
|
+
|
46
|
+
This Biogem is published at [#bio-statsample-glm](http://biogems.info/index.html)
|
47
|
+
|
48
|
+
## Copyright
|
49
|
+
|
50
|
+
Copyright (c) 2013 Ankur Goel. See LICENSE.txt for further details.
|
51
|
+
|
data/README.rdoc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
= bio-statsample-glm
|
2
|
+
|
3
|
+
{<img
|
4
|
+
src="https://secure.travis-ci.org/ankurgel/bioruby-statsample-glm.png"
|
5
|
+
/>}[http://travis-ci.org/#!/ankurgel/bioruby-statsample-glm]
|
6
|
+
|
7
|
+
Full description goes here
|
8
|
+
|
9
|
+
Note: this software is under active development!
|
10
|
+
|
11
|
+
== Installation
|
12
|
+
|
13
|
+
gem install bio-statsample-glm
|
14
|
+
|
15
|
+
== Usage
|
16
|
+
|
17
|
+
== Developers
|
18
|
+
|
19
|
+
To use the library
|
20
|
+
|
21
|
+
require 'bio-statsample-glm'
|
22
|
+
|
23
|
+
The API doc is online. For more code examples see also the test files in
|
24
|
+
the source tree.
|
25
|
+
|
26
|
+
== Project home page
|
27
|
+
|
28
|
+
Information on the source tree, documentation, issues and how to contribute, see
|
29
|
+
|
30
|
+
http://github.com/ankurgel/bioruby-statsample-glm
|
31
|
+
|
32
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
33
|
+
|
34
|
+
== Cite
|
35
|
+
|
36
|
+
If you use this software, please cite one of
|
37
|
+
|
38
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
39
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
40
|
+
|
41
|
+
== Biogems.info
|
42
|
+
|
43
|
+
This Biogem is published at http://biogems.info/index.html#bio-statsample-glm
|
44
|
+
|
45
|
+
== Copyright
|
46
|
+
|
47
|
+
Copyright (c) 2013 Ankur Goel. See LICENSE.txt for further details.
|
48
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-statsample-glm"
|
18
|
+
gem.homepage = "http://github.com/AnkurGel/bioruby-statsample-glm"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Generalized Linear Models for Statsample}
|
21
|
+
gem.description = %Q{Statsample-GLM is an extension to Statsample, an advance statistics suite in Ruby. This gem includes modules for Regression techniques such as Poisson Regression, Logistic Regression and Exponential Regression}
|
22
|
+
gem.email = "ankurgel@gmail.com"
|
23
|
+
gem.authors = ["Ankur Goel"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
require 'cucumber/rake/task'
|
36
|
+
Cucumber::Rake::Task.new(:features)
|
37
|
+
|
38
|
+
task :default => :test
|
39
|
+
|
40
|
+
require 'rdoc/task'
|
41
|
+
Rake::RDocTask.new do |rdoc|
|
42
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
43
|
+
|
44
|
+
rdoc.rdoc_dir = 'rdoc'
|
45
|
+
rdoc.title = "bio-statsample-glm #{version}"
|
46
|
+
rdoc.rdoc_files.include('README*')
|
47
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
48
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
File without changes
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
begin
|
3
|
+
Bundler.setup(:default, :development)
|
4
|
+
rescue Bundler::BundlerError => e
|
5
|
+
$stderr.puts e.message
|
6
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
7
|
+
exit e.status_code
|
8
|
+
end
|
9
|
+
|
10
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
|
11
|
+
require 'bio-statsample-glm'
|
12
|
+
|
13
|
+
require 'test/unit/assertions'
|
14
|
+
|
15
|
+
World(Test::Unit::Assertions)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# Please require your code below, respecting the naming conventions in the
|
2
|
+
# bioruby directory tree.
|
3
|
+
#
|
4
|
+
# For example, say you have a plugin named bio-plugin, the only uncommented
|
5
|
+
# line in this file would be
|
6
|
+
#
|
7
|
+
# require 'bio/bio-plugin/plugin'
|
8
|
+
#
|
9
|
+
# In this file only require other files. Avoid other source code.
|
10
|
+
|
11
|
+
require 'statsample'
|
12
|
+
require 'bio-statsample-glm/regression'
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'bio-statsample-glm/regression/poisson'
|
2
|
+
require 'bio-statsample-glm/regression/logistic'
|
3
|
+
module Statsample
|
4
|
+
module Regression
|
5
|
+
include Statsample::VectorShorthands
|
6
|
+
|
7
|
+
# = Generalized linear models
|
8
|
+
# == Parameters
|
9
|
+
#
|
10
|
+
# * x = model matrix
|
11
|
+
# * y = response vector
|
12
|
+
# * method = symbol; choice of glm strategy, default = :poisson
|
13
|
+
#
|
14
|
+
# == Usage
|
15
|
+
# require 'bio-statsample-glm'
|
16
|
+
# x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
|
17
|
+
# x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
|
18
|
+
# y=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
|
19
|
+
# x=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
|
20
|
+
# obj = Statsample::Regression.glm(x, y, :binomial)
|
21
|
+
# #=> Logistic Regression object
|
22
|
+
#
|
23
|
+
# == Returns
|
24
|
+
# GLM object for given method.
|
25
|
+
def self.glm(x, y, method=:poisson)
|
26
|
+
if method.downcase.to_sym == :poisson
|
27
|
+
obj = Statsample::Regression::GLM::Poisson.new(x,y)
|
28
|
+
elsif method.downcase.to_sym == :binomial
|
29
|
+
obj = Statsample::Regression::GLM::Logistic.new(x,y)
|
30
|
+
end
|
31
|
+
obj
|
32
|
+
#now, #irwls method is available to be called on returned obj
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def self.irwls(x, y, mu, w, j, h, epsilon = 1e-7, max_iter = 100)
|
37
|
+
b = Matrix.column_vector(Array.new(x.column_size,0.0))
|
38
|
+
converged = false
|
39
|
+
1.upto(max_iter) do |i|
|
40
|
+
#conversion from : (solve(j(x,b)) %*% h(x,b,y))
|
41
|
+
|
42
|
+
intermediate = (j.call(x,b).inverse * h.call(x,b,y))
|
43
|
+
b_new = b - intermediate
|
44
|
+
|
45
|
+
if((b_new - b).map(&:abs)).to_a.flatten.inject(:+) < epsilon
|
46
|
+
converged = true
|
47
|
+
b = b_new
|
48
|
+
break
|
49
|
+
end
|
50
|
+
b = b_new
|
51
|
+
end
|
52
|
+
ss = j.call(x,b).inverse.diagonal.map{ |x| -x}.map{ |y| Math.sqrt(y) }
|
53
|
+
values = mu.call(x,b)
|
54
|
+
|
55
|
+
residuals = y - values.column_vectors.map(&:to_a).flatten
|
56
|
+
df_residuals = y.count - x.column_size
|
57
|
+
return [create_vector(b.column_vectors[0]), create_vector(ss), create_vector(values.to_a.flatten),
|
58
|
+
residuals, max_iter, df_residuals, converged]
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
def self.create_vector(arr)
|
63
|
+
Statsample::Vector.new(arr, :scale)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
module GLM
|
4
|
+
|
5
|
+
class Logistic
|
6
|
+
|
7
|
+
# a named vector of coefficients
|
8
|
+
attr_reader :coefficients
|
9
|
+
#
|
10
|
+
attr_reader :se
|
11
|
+
# The fitted mean values
|
12
|
+
attr_reader :fit
|
13
|
+
# the _working_ residuals; that is the residuals in the final iteration of the IRWLS fit.
|
14
|
+
attr_reader :residuals
|
15
|
+
# The residuals degree of freedom
|
16
|
+
attr_reader :df
|
17
|
+
# Number of iterations used for convergence
|
18
|
+
attr_reader :iter
|
19
|
+
# Boolean. Tells whether the IRWLS for the given model converged or not
|
20
|
+
attr_reader :converged
|
21
|
+
|
22
|
+
def initialize(x, y)
|
23
|
+
@x = x
|
24
|
+
@y = y
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.mu(x, b)
|
28
|
+
matrix_mul = x * b
|
29
|
+
numerator = matrix_mul.map { |y| Math.exp(y) }
|
30
|
+
denominator = numerator.map { |y| 1 + y }
|
31
|
+
|
32
|
+
numerator.each_with_index { |e, r, c|
|
33
|
+
numerator[r,c] = numerator[r,c].to_f / denominator[r,c].to_f
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.w(x, b)
|
38
|
+
mus = mu(x,b).column_vectors.map(&:to_a).flatten
|
39
|
+
mus_intermediate = mus.collect { |x| 1 - x }
|
40
|
+
w = mus.zip(mus_intermediate).collect { |x| x.inject(:*) }
|
41
|
+
w_mat = Matrix.I(w.size)
|
42
|
+
w_enum = w.to_enum
|
43
|
+
return w_mat.map do |x|
|
44
|
+
x.eql?(1) ? w_enum.next : x
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.h(x,b,y)
|
49
|
+
x_t = x.transpose
|
50
|
+
mu_flat = mu(x,b).column_vectors.map(&:to_a).flatten
|
51
|
+
column_data = y.zip(mu_flat).collect { |x| x.inject(:-) }
|
52
|
+
x_t * Matrix.column_vector(column_data)
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.j(x,b)
|
56
|
+
w_matrix = w(x, b)
|
57
|
+
jacobian_matrix = x.transpose * w_matrix * x
|
58
|
+
jacobian_matrix.map { |x| -x }
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_s
|
62
|
+
sprintf("Logistic Regression (Statsample::Regression::GLM;:Logistic)")
|
63
|
+
end
|
64
|
+
|
65
|
+
# = Iteratively reweighted least squares
|
66
|
+
# Computes irwls for given model and parameters.
|
67
|
+
#
|
68
|
+
# == Usage
|
69
|
+
# require 'bio-statsample-glm'
|
70
|
+
# x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
|
71
|
+
# x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
|
72
|
+
# y=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
|
73
|
+
# x=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
|
74
|
+
# obj = Statsample::Regression.glm(x, y, :binomial)
|
75
|
+
# #=> Logistic Regression object
|
76
|
+
# obj.irlws
|
77
|
+
# #=> Array of returned values
|
78
|
+
# obj.coefficients
|
79
|
+
# #=> named vector of coefficients
|
80
|
+
|
81
|
+
def irwls
|
82
|
+
x, y = @x, @y
|
83
|
+
#calling irwls on Regression and passing equivalent methods in lambdas.
|
84
|
+
#Ruby_level+=awesome!
|
85
|
+
@coefficients, @se, @fit, @residuals, @df, @iter, @converged = Statsample::Regression.irwls(
|
86
|
+
x,y, ->l,m{self.class.mu(l,m)}, ->l,m{self.class.w(l,m)},
|
87
|
+
->l,m{self.class.j(l,m)}, ->k,l,m{self.class.h(k,l,m)}
|
88
|
+
)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
module GLM
|
4
|
+
|
5
|
+
class Poisson
|
6
|
+
|
7
|
+
# a named vector of coefficients
|
8
|
+
attr_reader :coefficients
|
9
|
+
#
|
10
|
+
attr_reader :se
|
11
|
+
# The fitted mean values
|
12
|
+
attr_reader :fit
|
13
|
+
# the _working_ residuals; that is the residuals in the final iteration of the IRWLS fit.
|
14
|
+
attr_reader :residuals
|
15
|
+
# The residuals degree of freedom
|
16
|
+
attr_reader :df
|
17
|
+
# Number of iterations used for convergence
|
18
|
+
attr_reader :iter
|
19
|
+
# Boolean. Tells whether the IRWLS for the given model converged or not
|
20
|
+
attr_reader :converged
|
21
|
+
|
22
|
+
def initialize(x, y)
|
23
|
+
@x = x
|
24
|
+
@y = y
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.mu(x, b, link=:log)
|
28
|
+
if link.downcase.to_sym == :log
|
29
|
+
(x * b).map { |y| Math.exp(y) }
|
30
|
+
elsif link.downcase.to_sym == :sqrt
|
31
|
+
(x * b).collect { |y| y**2 }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.w(x, b)
|
36
|
+
poisson_mu = mu(x,b)
|
37
|
+
mu_flat = poisson_mu.column_vectors.map(&:to_a).flatten
|
38
|
+
|
39
|
+
w_mat = Matrix.I(mu_flat.size)
|
40
|
+
mu_enum = mu_flat.to_enum
|
41
|
+
return w_mat.map do |x|
|
42
|
+
x.eql?(1) ? mu_enum.next : x
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.h(x, b, y)
|
47
|
+
x_t = x.transpose
|
48
|
+
mu_flat = mu(x,b).column_vectors.map(&:to_a).flatten
|
49
|
+
column_data = y.zip(mu_flat).collect { |x| x.inject(:-) }
|
50
|
+
x_t * Matrix.columns([column_data])
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.j(x, b)
|
54
|
+
w_matrix = w(x, b)
|
55
|
+
jacobian_matrix = x.transpose * w_matrix * x
|
56
|
+
jacobian_matrix.map { |x| -x }
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_s
|
60
|
+
sprintf("Logistic Regression (Statsample::Regression::GLM;:Logistic)")
|
61
|
+
end
|
62
|
+
|
63
|
+
def irwls
|
64
|
+
x,y = @x,@y
|
65
|
+
#calling irwls on Regression and passing equivalent methods in lambdas.
|
66
|
+
#Ruby_level+=awesome!
|
67
|
+
@coefficients, @se, @fit, @residuals, @df, @iter, @converged = Statsample::Regression.irwls(
|
68
|
+
x,y, ->l,m{self.class.mu(l,m)}, ->l,m{self.class.w(l,m)},
|
69
|
+
->l,m{self.class.j(l,m)}, ->k,l,m{self.class.h(k,l,m)}
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'minitest/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
require 'shoulda-context'
|
13
|
+
require 'mocha/setup'
|
14
|
+
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
17
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
18
|
+
require 'bio-statsample-glm'
|
19
|
+
module MiniTest
|
20
|
+
class Unit
|
21
|
+
class TestCase
|
22
|
+
include Shoulda::Context::Assertions
|
23
|
+
include Shoulda::Context::InstanceMethods
|
24
|
+
extend Shoulda::Context::ClassMethods
|
25
|
+
def self.should_with_gsl(name,&block)
|
26
|
+
should(name) do
|
27
|
+
if Statsample.has_gsl?
|
28
|
+
instance_eval(&block)
|
29
|
+
else
|
30
|
+
skip("Requires GSL")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module Assertions
|
38
|
+
def assert_similar_vector(exp, obs, delta=1e-10,msg=nil)
|
39
|
+
msg||="Different vectors #{exp} - #{obs}"
|
40
|
+
assert_equal(exp.size, obs.size)
|
41
|
+
exp.data_with_nils.each_with_index {|v,i|
|
42
|
+
assert_in_delta(v,obs[i],delta)
|
43
|
+
}
|
44
|
+
end
|
45
|
+
def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
|
46
|
+
assert_equal(exp.size, obs.size, "Different size.#{msg}")
|
47
|
+
exp.size.times {|i|
|
48
|
+
assert_in_delta(exp[i],obs[i],delta, "Different element #{i}. \nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
|
49
|
+
}
|
50
|
+
end
|
51
|
+
def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
|
52
|
+
assert_equal(exp.row_size, obs.row_size, "Different row size.#{msg}")
|
53
|
+
assert_equal(exp.column_size, obs.column_size, "Different column size.#{msg}")
|
54
|
+
exp.row_size.times {|i|
|
55
|
+
exp.column_size.times {|j|
|
56
|
+
assert_in_delta(exp[i,j],obs[i,j], delta, "Different element #{i},#{j}\nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
|
57
|
+
}
|
58
|
+
}
|
59
|
+
end
|
60
|
+
alias :assert_raise :assert_raises unless method_defined? :assert_raise
|
61
|
+
alias :assert_not_equal :refute_equal unless method_defined? :assert_not_equal
|
62
|
+
alias :assert_not_same :refute_same unless method_defined? :assert_not_same
|
63
|
+
unless method_defined? :assert_nothing_raised
|
64
|
+
def assert_nothing_raised(msg=nil)
|
65
|
+
msg||="Nothing should be raised, but raised %s"
|
66
|
+
begin
|
67
|
+
yield
|
68
|
+
not_raised=true
|
69
|
+
rescue Exception => e
|
70
|
+
not_raised=false
|
71
|
+
msg=sprintf(msg,e)
|
72
|
+
end
|
73
|
+
assert(not_raised,msg)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
MiniTest::Unit.autorun
|
data/test/test_glm.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
|
2
|
+
|
3
|
+
class StatsampleRegressionGlm < MiniTest::Unit::TestCase
|
4
|
+
|
5
|
+
context("Example") do
|
6
|
+
setup do
|
7
|
+
x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
|
8
|
+
x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
|
9
|
+
@y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
|
10
|
+
@y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
|
11
|
+
intercept=Statsample::Vector.new([1]*50,:scale)
|
12
|
+
@df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
|
13
|
+
end
|
14
|
+
|
15
|
+
context("Logistic") do
|
16
|
+
setup do
|
17
|
+
@glm=Statsample::Regression.glm(@df.to_matrix,@y_log,:binomial)
|
18
|
+
@glm.irwls
|
19
|
+
end
|
20
|
+
|
21
|
+
should "report correct coefficientes" do
|
22
|
+
assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context("Poisson") do
|
27
|
+
setup do
|
28
|
+
@glm=Statsample::Regression.glm(@df.to_matrix,@y_pois,:poisson)
|
29
|
+
@glm.irwls
|
30
|
+
end
|
31
|
+
|
32
|
+
should "report correct coefficientes" do
|
33
|
+
assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
metadata
ADDED
@@ -0,0 +1,246 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-statsample-glm
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ankur Goel
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-09-23 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: statsample
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.2.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.2.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: activesupport
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - '='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 3.2.10
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - '='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 3.2.10
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: shoulda
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rdoc
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '3.12'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '3.12'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: minitest
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ~>
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 4.7.5
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 4.7.5
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: cucumber
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: bundler
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ~>
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 1.3.5
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ~>
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.3.5
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: jeweler
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ~>
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: 1.8.4
|
134
|
+
type: :development
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ~>
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 1.8.4
|
142
|
+
- !ruby/object:Gem::Dependency
|
143
|
+
name: bio
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ! '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 1.4.2
|
150
|
+
type: :development
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ! '>='
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: 1.4.2
|
158
|
+
- !ruby/object:Gem::Dependency
|
159
|
+
name: rdoc
|
160
|
+
requirement: !ruby/object:Gem::Requirement
|
161
|
+
none: false
|
162
|
+
requirements:
|
163
|
+
- - ~>
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '3.12'
|
166
|
+
type: :development
|
167
|
+
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
none: false
|
170
|
+
requirements:
|
171
|
+
- - ~>
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '3.12'
|
174
|
+
- !ruby/object:Gem::Dependency
|
175
|
+
name: mocha
|
176
|
+
requirement: !ruby/object:Gem::Requirement
|
177
|
+
none: false
|
178
|
+
requirements:
|
179
|
+
- - ~>
|
180
|
+
- !ruby/object:Gem::Version
|
181
|
+
version: 0.14.0
|
182
|
+
type: :development
|
183
|
+
prerelease: false
|
184
|
+
version_requirements: !ruby/object:Gem::Requirement
|
185
|
+
none: false
|
186
|
+
requirements:
|
187
|
+
- - ~>
|
188
|
+
- !ruby/object:Gem::Version
|
189
|
+
version: 0.14.0
|
190
|
+
description: Statsample-GLM is an extension to Statsample, an advance statistics suite
|
191
|
+
in Ruby. This gem includes modules for Regression techniques such as Poisson Regression,
|
192
|
+
Logistic Regression and Exponential Regression
|
193
|
+
email: ankurgel@gmail.com
|
194
|
+
executables: []
|
195
|
+
extensions: []
|
196
|
+
extra_rdoc_files:
|
197
|
+
- LICENSE.txt
|
198
|
+
- README.md
|
199
|
+
- README.rdoc
|
200
|
+
files:
|
201
|
+
- .document
|
202
|
+
- .travis.yml
|
203
|
+
- Gemfile
|
204
|
+
- LICENSE.txt
|
205
|
+
- README.md
|
206
|
+
- README.rdoc
|
207
|
+
- Rakefile
|
208
|
+
- VERSION
|
209
|
+
- features/bio-statsample-glm.feature
|
210
|
+
- features/step_definitions/bio-statsample-glm_steps.rb
|
211
|
+
- features/support/env.rb
|
212
|
+
- lib/bio-statsample-glm.rb
|
213
|
+
- lib/bio-statsample-glm/regression.rb
|
214
|
+
- lib/bio-statsample-glm/regression/logistic.rb
|
215
|
+
- lib/bio-statsample-glm/regression/poisson.rb
|
216
|
+
- test/helper.rb
|
217
|
+
- test/test_glm.rb
|
218
|
+
homepage: http://github.com/AnkurGel/bioruby-statsample-glm
|
219
|
+
licenses:
|
220
|
+
- MIT
|
221
|
+
post_install_message:
|
222
|
+
rdoc_options: []
|
223
|
+
require_paths:
|
224
|
+
- lib
|
225
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
226
|
+
none: false
|
227
|
+
requirements:
|
228
|
+
- - ! '>='
|
229
|
+
- !ruby/object:Gem::Version
|
230
|
+
version: '0'
|
231
|
+
segments:
|
232
|
+
- 0
|
233
|
+
hash: 797002845
|
234
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
235
|
+
none: false
|
236
|
+
requirements:
|
237
|
+
- - ! '>='
|
238
|
+
- !ruby/object:Gem::Version
|
239
|
+
version: '0'
|
240
|
+
requirements: []
|
241
|
+
rubyforge_project:
|
242
|
+
rubygems_version: 1.8.25
|
243
|
+
signing_key:
|
244
|
+
specification_version: 3
|
245
|
+
summary: Generalized Linear Models for Statsample
|
246
|
+
test_files: []
|