bio-statsample-glm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.travis.yml +13 -0
- data/Gemfile +20 -0
- data/LICENSE.txt +20 -0
- data/README.md +51 -0
- data/README.rdoc +48 -0
- data/Rakefile +48 -0
- data/VERSION +1 -0
- data/features/bio-statsample-glm.feature +9 -0
- data/features/step_definitions/bio-statsample-glm_steps.rb +0 -0
- data/features/support/env.rb +15 -0
- data/lib/bio-statsample-glm.rb +12 -0
- data/lib/bio-statsample-glm/regression.rb +66 -0
- data/lib/bio-statsample-glm/regression/logistic.rb +94 -0
- data/lib/bio-statsample-glm/regression/poisson.rb +76 -0
- data/test/helper.rb +79 -0
- data/test/test_glm.rb +37 -0
- metadata +246 -0
data/.document
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
language: ruby
|
2
|
+
rvm:
|
3
|
+
- 1.9.2
|
4
|
+
- 1.9.3
|
5
|
+
- 2.0.0
|
6
|
+
- jruby-19mode # JRuby in 1.9 mode
|
7
|
+
- rbx-19mode
|
8
|
+
# - 1.8.7
|
9
|
+
# - jruby-18mode # JRuby in 1.8 mode
|
10
|
+
# - rbx-18mode
|
11
|
+
|
12
|
+
# uncomment this line if your project needs to run something other than `rake`:
|
13
|
+
# script: bundle exec rspec spec
|
data/Gemfile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem 'statsample', '>=1.2.0'
|
4
|
+
# Add dependencies required to use your gem here.
|
5
|
+
# Example:
|
6
|
+
gem "activesupport", "= 3.2.10"
|
7
|
+
|
8
|
+
# Add dependencies to develop your gem here.
|
9
|
+
# Include everything needed to run rake, tests, features, etc.
|
10
|
+
group :development do
|
11
|
+
gem "shoulda", ">= 0"
|
12
|
+
gem "rdoc", "~> 3.12"
|
13
|
+
gem "minitest", "~> 4.7.5"
|
14
|
+
gem "cucumber", ">= 0"
|
15
|
+
gem "bundler", "~> 1.3.5"
|
16
|
+
gem "jeweler", "~> 1.8.4"
|
17
|
+
gem "bio", ">= 1.4.2"
|
18
|
+
gem "rdoc", "~> 3.12"
|
19
|
+
gem "mocha", "~> 0.14.0"
|
20
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Ankur Goel
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# bio-statsample-glm
|
2
|
+
|
3
|
+
[](http://travis-ci.org/AnkurGel/bioruby-statsample-glm)
|
4
|
+
|
5
|
+
Statsample-GLM is an extension to [Statsample](https://github.com/clbustos/statsample), a suite of advance statistics in Ruby. It includes modules for various regression techniques such as:
|
6
|
+
|
7
|
+
* Poisson Regression
|
8
|
+
* Logistic Regression
|
9
|
+
* Exponential Regression
|
10
|
+
|
11
|
+
Note: this software is under active development!
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
```sh
|
16
|
+
gem install bio-statsample-glm
|
17
|
+
```
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
require 'bio-statsample-glm'
|
23
|
+
```
|
24
|
+
|
25
|
+
The API doc is online. For more code examples see the test files in
|
26
|
+
the source tree.
|
27
|
+
|
28
|
+
## Project home page
|
29
|
+
|
30
|
+
Information on the source tree, documentation, examples, issues and
|
31
|
+
how to contribute, see
|
32
|
+
|
33
|
+
http://github.com/AnkurGel/bioruby-statsample-glm
|
34
|
+
|
35
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
36
|
+
|
37
|
+
## Cite
|
38
|
+
|
39
|
+
If you use this software, please cite one of
|
40
|
+
|
41
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
42
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
43
|
+
|
44
|
+
## Biogems.info
|
45
|
+
|
46
|
+
This Biogem is published at [#bio-statsample-glm](http://biogems.info/index.html)
|
47
|
+
|
48
|
+
## Copyright
|
49
|
+
|
50
|
+
Copyright (c) 2013 Ankur Goel. See LICENSE.txt for further details.
|
51
|
+
|
data/README.rdoc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
= bio-statsample-glm
|
2
|
+
|
3
|
+
{<img
|
4
|
+
src="https://secure.travis-ci.org/ankurgel/bioruby-statsample-glm.png"
|
5
|
+
/>}[http://travis-ci.org/#!/ankurgel/bioruby-statsample-glm]
|
6
|
+
|
7
|
+
Full description goes here
|
8
|
+
|
9
|
+
Note: this software is under active development!
|
10
|
+
|
11
|
+
== Installation
|
12
|
+
|
13
|
+
gem install bio-statsample-glm
|
14
|
+
|
15
|
+
== Usage
|
16
|
+
|
17
|
+
== Developers
|
18
|
+
|
19
|
+
To use the library
|
20
|
+
|
21
|
+
require 'bio-statsample-glm'
|
22
|
+
|
23
|
+
The API doc is online. For more code examples see also the test files in
|
24
|
+
the source tree.
|
25
|
+
|
26
|
+
== Project home page
|
27
|
+
|
28
|
+
Information on the source tree, documentation, issues and how to contribute, see
|
29
|
+
|
30
|
+
http://github.com/ankurgel/bioruby-statsample-glm
|
31
|
+
|
32
|
+
The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
33
|
+
|
34
|
+
== Cite
|
35
|
+
|
36
|
+
If you use this software, please cite one of
|
37
|
+
|
38
|
+
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
39
|
+
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
40
|
+
|
41
|
+
== Biogems.info
|
42
|
+
|
43
|
+
This Biogem is published at http://biogems.info/index.html#bio-statsample-glm
|
44
|
+
|
45
|
+
== Copyright
|
46
|
+
|
47
|
+
Copyright (c) 2013 Ankur Goel. See LICENSE.txt for further details.
|
48
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-statsample-glm"
|
18
|
+
gem.homepage = "http://github.com/AnkurGel/bioruby-statsample-glm"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Generalized Linear Models for Statsample}
|
21
|
+
gem.description = %Q{Statsample-GLM is an extension to Statsample, an advance statistics suite in Ruby. This gem includes modules for Regression techniques such as Poisson Regression, Logistic Regression and Exponential Regression}
|
22
|
+
gem.email = "ankurgel@gmail.com"
|
23
|
+
gem.authors = ["Ankur Goel"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rake/testtask'
|
29
|
+
Rake::TestTask.new(:test) do |test|
|
30
|
+
test.libs << 'lib' << 'test'
|
31
|
+
test.pattern = 'test/**/test_*.rb'
|
32
|
+
test.verbose = true
|
33
|
+
end
|
34
|
+
|
35
|
+
require 'cucumber/rake/task'
|
36
|
+
Cucumber::Rake::Task.new(:features)
|
37
|
+
|
38
|
+
task :default => :test
|
39
|
+
|
40
|
+
require 'rdoc/task'
|
41
|
+
Rake::RDocTask.new do |rdoc|
|
42
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
43
|
+
|
44
|
+
rdoc.rdoc_dir = 'rdoc'
|
45
|
+
rdoc.title = "bio-statsample-glm #{version}"
|
46
|
+
rdoc.rdoc_files.include('README*')
|
47
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
48
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
File without changes
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
begin
|
3
|
+
Bundler.setup(:default, :development)
|
4
|
+
rescue Bundler::BundlerError => e
|
5
|
+
$stderr.puts e.message
|
6
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
7
|
+
exit e.status_code
|
8
|
+
end
|
9
|
+
|
10
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
|
11
|
+
require 'bio-statsample-glm'
|
12
|
+
|
13
|
+
require 'test/unit/assertions'
|
14
|
+
|
15
|
+
World(Test::Unit::Assertions)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# Please require your code below, respecting the naming conventions in the
|
2
|
+
# bioruby directory tree.
|
3
|
+
#
|
4
|
+
# For example, say you have a plugin named bio-plugin, the only uncommented
|
5
|
+
# line in this file would be
|
6
|
+
#
|
7
|
+
# require 'bio/bio-plugin/plugin'
|
8
|
+
#
|
9
|
+
# In this file only require other files. Avoid other source code.
|
10
|
+
|
11
|
+
require 'statsample'
|
12
|
+
require 'bio-statsample-glm/regression'
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'bio-statsample-glm/regression/poisson'
|
2
|
+
require 'bio-statsample-glm/regression/logistic'
|
3
|
+
module Statsample
|
4
|
+
module Regression
|
5
|
+
include Statsample::VectorShorthands
|
6
|
+
|
7
|
+
# = Generalized linear models
|
8
|
+
# == Parameters
|
9
|
+
#
|
10
|
+
# * x = model matrix
|
11
|
+
# * y = response vector
|
12
|
+
# * method = symbol; choice of glm strategy, default = :poisson
|
13
|
+
#
|
14
|
+
# == Usage
|
15
|
+
# require 'bio-statsample-glm'
|
16
|
+
# x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
|
17
|
+
# x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
|
18
|
+
# y=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
|
19
|
+
# x=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
|
20
|
+
# obj = Statsample::Regression.glm(x, y, :binomial)
|
21
|
+
# #=> Logistic Regression object
|
22
|
+
#
|
23
|
+
# == Returns
|
24
|
+
# GLM object for given method.
|
25
|
+
def self.glm(x, y, method=:poisson)
|
26
|
+
if method.downcase.to_sym == :poisson
|
27
|
+
obj = Statsample::Regression::GLM::Poisson.new(x,y)
|
28
|
+
elsif method.downcase.to_sym == :binomial
|
29
|
+
obj = Statsample::Regression::GLM::Logistic.new(x,y)
|
30
|
+
end
|
31
|
+
obj
|
32
|
+
#now, #irwls method is available to be called on returned obj
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
def self.irwls(x, y, mu, w, j, h, epsilon = 1e-7, max_iter = 100)
|
37
|
+
b = Matrix.column_vector(Array.new(x.column_size,0.0))
|
38
|
+
converged = false
|
39
|
+
1.upto(max_iter) do |i|
|
40
|
+
#conversion from : (solve(j(x,b)) %*% h(x,b,y))
|
41
|
+
|
42
|
+
intermediate = (j.call(x,b).inverse * h.call(x,b,y))
|
43
|
+
b_new = b - intermediate
|
44
|
+
|
45
|
+
if((b_new - b).map(&:abs)).to_a.flatten.inject(:+) < epsilon
|
46
|
+
converged = true
|
47
|
+
b = b_new
|
48
|
+
break
|
49
|
+
end
|
50
|
+
b = b_new
|
51
|
+
end
|
52
|
+
ss = j.call(x,b).inverse.diagonal.map{ |x| -x}.map{ |y| Math.sqrt(y) }
|
53
|
+
values = mu.call(x,b)
|
54
|
+
|
55
|
+
residuals = y - values.column_vectors.map(&:to_a).flatten
|
56
|
+
df_residuals = y.count - x.column_size
|
57
|
+
return [create_vector(b.column_vectors[0]), create_vector(ss), create_vector(values.to_a.flatten),
|
58
|
+
residuals, max_iter, df_residuals, converged]
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
def self.create_vector(arr)
|
63
|
+
Statsample::Vector.new(arr, :scale)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
module GLM
|
4
|
+
|
5
|
+
class Logistic
|
6
|
+
|
7
|
+
# a named vector of coefficients
|
8
|
+
attr_reader :coefficients
|
9
|
+
#
|
10
|
+
attr_reader :se
|
11
|
+
# The fitted mean values
|
12
|
+
attr_reader :fit
|
13
|
+
# the _working_ residuals; that is the residuals in the final iteration of the IRWLS fit.
|
14
|
+
attr_reader :residuals
|
15
|
+
# The residuals degree of freedom
|
16
|
+
attr_reader :df
|
17
|
+
# Number of iterations used for convergence
|
18
|
+
attr_reader :iter
|
19
|
+
# Boolean. Tells whether the IRWLS for the given model converged or not
|
20
|
+
attr_reader :converged
|
21
|
+
|
22
|
+
def initialize(x, y)
|
23
|
+
@x = x
|
24
|
+
@y = y
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.mu(x, b)
|
28
|
+
matrix_mul = x * b
|
29
|
+
numerator = matrix_mul.map { |y| Math.exp(y) }
|
30
|
+
denominator = numerator.map { |y| 1 + y }
|
31
|
+
|
32
|
+
numerator.each_with_index { |e, r, c|
|
33
|
+
numerator[r,c] = numerator[r,c].to_f / denominator[r,c].to_f
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.w(x, b)
|
38
|
+
mus = mu(x,b).column_vectors.map(&:to_a).flatten
|
39
|
+
mus_intermediate = mus.collect { |x| 1 - x }
|
40
|
+
w = mus.zip(mus_intermediate).collect { |x| x.inject(:*) }
|
41
|
+
w_mat = Matrix.I(w.size)
|
42
|
+
w_enum = w.to_enum
|
43
|
+
return w_mat.map do |x|
|
44
|
+
x.eql?(1) ? w_enum.next : x
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.h(x,b,y)
|
49
|
+
x_t = x.transpose
|
50
|
+
mu_flat = mu(x,b).column_vectors.map(&:to_a).flatten
|
51
|
+
column_data = y.zip(mu_flat).collect { |x| x.inject(:-) }
|
52
|
+
x_t * Matrix.column_vector(column_data)
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.j(x,b)
|
56
|
+
w_matrix = w(x, b)
|
57
|
+
jacobian_matrix = x.transpose * w_matrix * x
|
58
|
+
jacobian_matrix.map { |x| -x }
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_s
|
62
|
+
sprintf("Logistic Regression (Statsample::Regression::GLM;:Logistic)")
|
63
|
+
end
|
64
|
+
|
65
|
+
# = Iteratively reweighted least squares
|
66
|
+
# Computes irwls for given model and parameters.
|
67
|
+
#
|
68
|
+
# == Usage
|
69
|
+
# require 'bio-statsample-glm'
|
70
|
+
# x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
|
71
|
+
# x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
|
72
|
+
# y=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
|
73
|
+
# x=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
|
74
|
+
# obj = Statsample::Regression.glm(x, y, :binomial)
|
75
|
+
# #=> Logistic Regression object
|
76
|
+
# obj.irlws
|
77
|
+
# #=> Array of returned values
|
78
|
+
# obj.coefficients
|
79
|
+
# #=> named vector of coefficients
|
80
|
+
|
81
|
+
def irwls
|
82
|
+
x, y = @x, @y
|
83
|
+
#calling irwls on Regression and passing equivalent methods in lambdas.
|
84
|
+
#Ruby_level+=awesome!
|
85
|
+
@coefficients, @se, @fit, @residuals, @df, @iter, @converged = Statsample::Regression.irwls(
|
86
|
+
x,y, ->l,m{self.class.mu(l,m)}, ->l,m{self.class.w(l,m)},
|
87
|
+
->l,m{self.class.j(l,m)}, ->k,l,m{self.class.h(k,l,m)}
|
88
|
+
)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Regression
|
3
|
+
module GLM
|
4
|
+
|
5
|
+
class Poisson
|
6
|
+
|
7
|
+
# a named vector of coefficients
|
8
|
+
attr_reader :coefficients
|
9
|
+
#
|
10
|
+
attr_reader :se
|
11
|
+
# The fitted mean values
|
12
|
+
attr_reader :fit
|
13
|
+
# the _working_ residuals; that is the residuals in the final iteration of the IRWLS fit.
|
14
|
+
attr_reader :residuals
|
15
|
+
# The residuals degree of freedom
|
16
|
+
attr_reader :df
|
17
|
+
# Number of iterations used for convergence
|
18
|
+
attr_reader :iter
|
19
|
+
# Boolean. Tells whether the IRWLS for the given model converged or not
|
20
|
+
attr_reader :converged
|
21
|
+
|
22
|
+
def initialize(x, y)
|
23
|
+
@x = x
|
24
|
+
@y = y
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.mu(x, b, link=:log)
|
28
|
+
if link.downcase.to_sym == :log
|
29
|
+
(x * b).map { |y| Math.exp(y) }
|
30
|
+
elsif link.downcase.to_sym == :sqrt
|
31
|
+
(x * b).collect { |y| y**2 }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.w(x, b)
|
36
|
+
poisson_mu = mu(x,b)
|
37
|
+
mu_flat = poisson_mu.column_vectors.map(&:to_a).flatten
|
38
|
+
|
39
|
+
w_mat = Matrix.I(mu_flat.size)
|
40
|
+
mu_enum = mu_flat.to_enum
|
41
|
+
return w_mat.map do |x|
|
42
|
+
x.eql?(1) ? mu_enum.next : x
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.h(x, b, y)
|
47
|
+
x_t = x.transpose
|
48
|
+
mu_flat = mu(x,b).column_vectors.map(&:to_a).flatten
|
49
|
+
column_data = y.zip(mu_flat).collect { |x| x.inject(:-) }
|
50
|
+
x_t * Matrix.columns([column_data])
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.j(x, b)
|
54
|
+
w_matrix = w(x, b)
|
55
|
+
jacobian_matrix = x.transpose * w_matrix * x
|
56
|
+
jacobian_matrix.map { |x| -x }
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_s
|
60
|
+
sprintf("Logistic Regression (Statsample::Regression::GLM;:Logistic)")
|
61
|
+
end
|
62
|
+
|
63
|
+
def irwls
|
64
|
+
x,y = @x,@y
|
65
|
+
#calling irwls on Regression and passing equivalent methods in lambdas.
|
66
|
+
#Ruby_level+=awesome!
|
67
|
+
@coefficients, @se, @fit, @residuals, @df, @iter, @converged = Statsample::Regression.irwls(
|
68
|
+
x,y, ->l,m{self.class.mu(l,m)}, ->l,m{self.class.w(l,m)},
|
69
|
+
->l,m{self.class.j(l,m)}, ->k,l,m{self.class.h(k,l,m)}
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'minitest/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
require 'shoulda-context'
|
13
|
+
require 'mocha/setup'
|
14
|
+
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
17
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
18
|
+
require 'bio-statsample-glm'
|
19
|
+
module MiniTest
|
20
|
+
class Unit
|
21
|
+
class TestCase
|
22
|
+
include Shoulda::Context::Assertions
|
23
|
+
include Shoulda::Context::InstanceMethods
|
24
|
+
extend Shoulda::Context::ClassMethods
|
25
|
+
def self.should_with_gsl(name,&block)
|
26
|
+
should(name) do
|
27
|
+
if Statsample.has_gsl?
|
28
|
+
instance_eval(&block)
|
29
|
+
else
|
30
|
+
skip("Requires GSL")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
module Assertions
|
38
|
+
def assert_similar_vector(exp, obs, delta=1e-10,msg=nil)
|
39
|
+
msg||="Different vectors #{exp} - #{obs}"
|
40
|
+
assert_equal(exp.size, obs.size)
|
41
|
+
exp.data_with_nils.each_with_index {|v,i|
|
42
|
+
assert_in_delta(v,obs[i],delta)
|
43
|
+
}
|
44
|
+
end
|
45
|
+
def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
|
46
|
+
assert_equal(exp.size, obs.size, "Different size.#{msg}")
|
47
|
+
exp.size.times {|i|
|
48
|
+
assert_in_delta(exp[i],obs[i],delta, "Different element #{i}. \nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
|
49
|
+
}
|
50
|
+
end
|
51
|
+
def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
|
52
|
+
assert_equal(exp.row_size, obs.row_size, "Different row size.#{msg}")
|
53
|
+
assert_equal(exp.column_size, obs.column_size, "Different column size.#{msg}")
|
54
|
+
exp.row_size.times {|i|
|
55
|
+
exp.column_size.times {|j|
|
56
|
+
assert_in_delta(exp[i,j],obs[i,j], delta, "Different element #{i},#{j}\nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
|
57
|
+
}
|
58
|
+
}
|
59
|
+
end
|
60
|
+
alias :assert_raise :assert_raises unless method_defined? :assert_raise
|
61
|
+
alias :assert_not_equal :refute_equal unless method_defined? :assert_not_equal
|
62
|
+
alias :assert_not_same :refute_same unless method_defined? :assert_not_same
|
63
|
+
unless method_defined? :assert_nothing_raised
|
64
|
+
def assert_nothing_raised(msg=nil)
|
65
|
+
msg||="Nothing should be raised, but raised %s"
|
66
|
+
begin
|
67
|
+
yield
|
68
|
+
not_raised=true
|
69
|
+
rescue Exception => e
|
70
|
+
not_raised=false
|
71
|
+
msg=sprintf(msg,e)
|
72
|
+
end
|
73
|
+
assert(not_raised,msg)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
MiniTest::Unit.autorun
|
data/test/test_glm.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
|
2
|
+
|
3
|
+
class StatsampleRegressionGlm < MiniTest::Unit::TestCase
|
4
|
+
|
5
|
+
context("Example") do
|
6
|
+
setup do
|
7
|
+
x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
|
8
|
+
x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
|
9
|
+
@y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
|
10
|
+
@y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
|
11
|
+
intercept=Statsample::Vector.new([1]*50,:scale)
|
12
|
+
@df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
|
13
|
+
end
|
14
|
+
|
15
|
+
context("Logistic") do
|
16
|
+
setup do
|
17
|
+
@glm=Statsample::Regression.glm(@df.to_matrix,@y_log,:binomial)
|
18
|
+
@glm.irwls
|
19
|
+
end
|
20
|
+
|
21
|
+
should "report correct coefficientes" do
|
22
|
+
assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context("Poisson") do
|
27
|
+
setup do
|
28
|
+
@glm=Statsample::Regression.glm(@df.to_matrix,@y_pois,:poisson)
|
29
|
+
@glm.irwls
|
30
|
+
end
|
31
|
+
|
32
|
+
should "report correct coefficientes" do
|
33
|
+
assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
metadata
ADDED
@@ -0,0 +1,246 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-statsample-glm
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ankur Goel
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-09-23 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: statsample
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.2.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.2.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: activesupport
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - '='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 3.2.10
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - '='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 3.2.10
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: shoulda
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rdoc
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '3.12'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '3.12'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: minitest
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ~>
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 4.7.5
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 4.7.5
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: cucumber
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: bundler
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ~>
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 1.3.5
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ~>
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.3.5
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: jeweler
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ~>
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: 1.8.4
|
134
|
+
type: :development
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ~>
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 1.8.4
|
142
|
+
- !ruby/object:Gem::Dependency
|
143
|
+
name: bio
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ! '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 1.4.2
|
150
|
+
type: :development
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ! '>='
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: 1.4.2
|
158
|
+
- !ruby/object:Gem::Dependency
|
159
|
+
name: rdoc
|
160
|
+
requirement: !ruby/object:Gem::Requirement
|
161
|
+
none: false
|
162
|
+
requirements:
|
163
|
+
- - ~>
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '3.12'
|
166
|
+
type: :development
|
167
|
+
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
none: false
|
170
|
+
requirements:
|
171
|
+
- - ~>
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '3.12'
|
174
|
+
- !ruby/object:Gem::Dependency
|
175
|
+
name: mocha
|
176
|
+
requirement: !ruby/object:Gem::Requirement
|
177
|
+
none: false
|
178
|
+
requirements:
|
179
|
+
- - ~>
|
180
|
+
- !ruby/object:Gem::Version
|
181
|
+
version: 0.14.0
|
182
|
+
type: :development
|
183
|
+
prerelease: false
|
184
|
+
version_requirements: !ruby/object:Gem::Requirement
|
185
|
+
none: false
|
186
|
+
requirements:
|
187
|
+
- - ~>
|
188
|
+
- !ruby/object:Gem::Version
|
189
|
+
version: 0.14.0
|
190
|
+
description: Statsample-GLM is an extension to Statsample, an advance statistics suite
|
191
|
+
in Ruby. This gem includes modules for Regression techniques such as Poisson Regression,
|
192
|
+
Logistic Regression and Exponential Regression
|
193
|
+
email: ankurgel@gmail.com
|
194
|
+
executables: []
|
195
|
+
extensions: []
|
196
|
+
extra_rdoc_files:
|
197
|
+
- LICENSE.txt
|
198
|
+
- README.md
|
199
|
+
- README.rdoc
|
200
|
+
files:
|
201
|
+
- .document
|
202
|
+
- .travis.yml
|
203
|
+
- Gemfile
|
204
|
+
- LICENSE.txt
|
205
|
+
- README.md
|
206
|
+
- README.rdoc
|
207
|
+
- Rakefile
|
208
|
+
- VERSION
|
209
|
+
- features/bio-statsample-glm.feature
|
210
|
+
- features/step_definitions/bio-statsample-glm_steps.rb
|
211
|
+
- features/support/env.rb
|
212
|
+
- lib/bio-statsample-glm.rb
|
213
|
+
- lib/bio-statsample-glm/regression.rb
|
214
|
+
- lib/bio-statsample-glm/regression/logistic.rb
|
215
|
+
- lib/bio-statsample-glm/regression/poisson.rb
|
216
|
+
- test/helper.rb
|
217
|
+
- test/test_glm.rb
|
218
|
+
homepage: http://github.com/AnkurGel/bioruby-statsample-glm
|
219
|
+
licenses:
|
220
|
+
- MIT
|
221
|
+
post_install_message:
|
222
|
+
rdoc_options: []
|
223
|
+
require_paths:
|
224
|
+
- lib
|
225
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
226
|
+
none: false
|
227
|
+
requirements:
|
228
|
+
- - ! '>='
|
229
|
+
- !ruby/object:Gem::Version
|
230
|
+
version: '0'
|
231
|
+
segments:
|
232
|
+
- 0
|
233
|
+
hash: 797002845
|
234
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
235
|
+
none: false
|
236
|
+
requirements:
|
237
|
+
- - ! '>='
|
238
|
+
- !ruby/object:Gem::Version
|
239
|
+
version: '0'
|
240
|
+
requirements: []
|
241
|
+
rubyforge_project:
|
242
|
+
rubygems_version: 1.8.25
|
243
|
+
signing_key:
|
244
|
+
specification_version: 3
|
245
|
+
summary: Generalized Linear Models for Statsample
|
246
|
+
test_files: []
|