statsample-glm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - 2.0.0
6
+ - jruby-19mode # JRuby in 1.9 mode
7
+ - rbx-19mode
8
+ # - 1.8.7
9
+ # - jruby-18mode # JRuby in 1.8 mode
10
+ # - rbx-18mode
11
+
12
+ # uncomment this line if your project needs to run something other than `rake`:
13
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,20 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'statsample', '>=1.2.0'
4
+ # Add dependencies required to use your gem here.
5
+ # Example:
6
+ gem "activesupport", "= 3.2.10"
7
+
8
+ # Add dependencies to develop your gem here.
9
+ # Include everything needed to run rake, tests, features, etc.
10
+ group :development do
11
+ gem "shoulda", ">= 0"
12
+ gem "rdoc", "~> 3.12"
13
+ gem "minitest", "~> 4.7.5"
14
+ gem "cucumber", ">= 0"
15
+ gem "bundler", "~> 1.3.5"
16
+ gem "jeweler", "~> 1.8.4"
17
+ gem "bio", ">= 1.4.2"
18
+ gem "rdoc", "~> 3.12"
19
+ gem "mocha", "~> 0.14.0"
20
+ end
@@ -0,0 +1,22 @@
1
+ This version of Statsample-GLM is licensed under the BSD 2-clause license.
2
+
3
+ * http://sciruby.com
4
+ * http://github.com/sciruby/sciruby/wiki/License
5
+
6
+ You *must* read the Contributor Agreement before contributing code to the SciRuby Project. This is available online:
7
+
8
+ * http://github.com/sciruby/sciruby/wiki/Contributor-Agreement
9
+
10
+ -----
11
+
12
+ Copyright (c) 2010 - 2013, Ruby Science Foundation
13
+ All rights reserved.
14
+
15
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
16
+
17
+ * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
18
+
19
+ * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
20
+
21
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22
+
@@ -0,0 +1,71 @@
1
+ = statsample-glm
2
+
3
+ {<img
4
+ src="https://secure.travis-ci.org/AnkurGel/statsample-glm.png"
5
+ />}[http://travis-ci.org/#!/AnkurGel/statsample-glm]
6
+
7
+ Statsample-GLM is an extension of *Generalized Linear Models* to {Statsample}[https://github.com/SciRuby/statsample], a suite of advance statistics in Ruby.
8
+
9
+ * {sciruby.com}[http://sciruby.com]
10
+ * {Google+}[https://plus.google.com/109304769076178160953/posts]
11
+ * {Ankur Goel}[http://ankurgoel.com]
12
+ * {Statsample}[https://github.com/SciRuby/statsample]
13
+
14
+
15
+ == Description
16
+ Statsample-GLM is an extension of Statsample, and includes many helpful regression techniques for Generalized Linear models such as:
17
+
18
+ * Poisson Regression
19
+ * Logistic Regression
20
+ * Exponential Regression
21
+ * Iteratively Reweighted Least Squares
22
+
23
+ Statsample-GLM was created by Ankur Goel as part of Google's Summer of Code 2013. It is the part of {SciRuby}[http://sciruby.com]
24
+
25
+ Note: This is under active development!
26
+
27
+ == Installation
28
+
29
+ gem install statsample-glm
30
+
31
+
32
+ == Usage
33
+
34
+ To use the library
35
+
36
+ require 'statsample-glm'
37
+
38
+ You can also go through the blog-posts on {my blog}[http://ankurgoel.com] for descriptive explanation and examples.
39
+
40
+
41
+ == Documentation
42
+
43
+ The API doc is {online}[http://rubygems.org/gems/statsample-glm]. For more code examples see also the test files in the source tree.
44
+
45
+
46
+ == Contributing
47
+
48
+ * Fork the project.
49
+ * Create your feature branch
50
+ * Add/Modify code.
51
+ * Write equivalent documentation and **tests**.
52
+ * Run `rake test` to verify that all test case passes.
53
+ * Push your branch.
54
+ * Pull request. :)
55
+
56
+
57
+ == Project home page
58
+
59
+ Information on the source tree, documentation, issues and how to contribute, see
60
+
61
+ http://github.com/ankurgel/statsample-glm
62
+
63
+
64
+ == Biogems.info
65
+
66
+ This Biogem is published at http://biogems.info/index.html#statsample-glm
67
+
68
+ == Copyright
69
+
70
+ Copyright (c) 2013 Ankur Goel. See LICENSE.txt for further details.
71
+
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "statsample-glm"
18
+ gem.homepage = "http://github.com/AnkurGel/statsample-glm"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Generalized Linear Models for Statsample}
21
+ gem.description = %Q{Statsample-GLM is an extension to Statsample, an advance statistics suite in Ruby. This gem includes modules for Regression techniques such as Poisson Regression, Logistic Regression and Exponential Regression}
22
+ gem.email = "ankurgel@gmail.com"
23
+ gem.authors = ["Ankur Goel"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ require 'cucumber/rake/task'
36
+ Cucumber::Rake::Task.new(:features)
37
+
38
+ task :default => :test
39
+
40
+ require 'rdoc/task'
41
+ Rake::RDocTask.new do |rdoc|
42
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
43
+
44
+ rdoc.rdoc_dir = 'rdoc'
45
+ rdoc.title = "statsample-glm #{version}"
46
+ rdoc.rdoc_files.include('README*')
47
+ rdoc.rdoc_files.include('lib/**/*.rb')
48
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,9 @@
1
+ Feature: something something
2
+ In order to something something
3
+ A user something something
4
+ something something something
5
+
6
+ Scenario: something something
7
+ Given inspiration
8
+ When I create a sweet new gem
9
+ Then everyone should see how awesome I am
@@ -0,0 +1,15 @@
1
+ require 'bundler'
2
+ begin
3
+ Bundler.setup(:default, :development)
4
+ rescue Bundler::BundlerError => e
5
+ $stderr.puts e.message
6
+ $stderr.puts "Run `bundle install` to install missing gems"
7
+ exit e.status_code
8
+ end
9
+
10
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
11
+ require 'statsample-glm'
12
+
13
+ require 'test/unit/assertions'
14
+
15
+ World(Test::Unit::Assertions)
@@ -0,0 +1,12 @@
1
+ # Please require your code below, respecting the naming conventions in the
2
+ # bioruby directory tree.
3
+ #
4
+ # For example, say you have a plugin named bio-plugin, the only uncommented
5
+ # line in this file would be
6
+ #
7
+ # require 'bio/bio-plugin/plugin'
8
+ #
9
+ # In this file only require other files. Avoid other source code.
10
+
11
+ require 'statsample'
12
+ require 'statsample-glm/regression'
@@ -0,0 +1,69 @@
1
+ require 'statsample-glm/regression/poisson'
2
+ require 'statsample-glm/regression/logistic'
3
+ module Statsample
4
+ module Regression
5
+ include Statsample::VectorShorthands
6
+
7
+ # = Generalized linear models
8
+ # == Parameters
9
+ #
10
+ # * x = model matrix
11
+ # * y = response vector
12
+ # * method = symbol; choice of glm strategy, default = :poisson
13
+ #
14
+ # == Usage
15
+ # require 'statsample-glm'
16
+ # x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
17
+ # x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
18
+ # y=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
19
+ # x=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
20
+ # obj = Statsample::Regression.glm(x, y, :binomial)
21
+ # #=> Logistic Regression object
22
+ #
23
+ # == Returns
24
+ # GLM object for given method.
25
+ def self.glm(x, y, method=:gaussian)
26
+
27
+ if method.downcase.to_sym == :poisson
28
+ obj = Statsample::Regression::GLM::Poisson.new(x,y)
29
+ elsif method.downcase.to_sym == :binomial
30
+ obj = Statsample::Regression::GLM::Logistic.new(x,y)
31
+ else
32
+ raise("Not implemented yet")
33
+ end
34
+ obj.irwls
35
+ obj
36
+ end
37
+
38
+
39
+ def self.irwls(x, y, mu, w, j, h, epsilon = 1e-7, max_iter = 100)
40
+ b = Matrix.column_vector(Array.new(x.column_size,0.0))
41
+ converged = false
42
+ 1.upto(max_iter) do |i|
43
+ #conversion from : (solve(j(x,b)) %*% h(x,b,y))
44
+
45
+ intermediate = (j.call(x,b).inverse * h.call(x,b,y))
46
+ b_new = b - intermediate
47
+
48
+ if((b_new - b).map(&:abs)).to_a.flatten.inject(:+) < epsilon
49
+ converged = true
50
+ b = b_new
51
+ break
52
+ end
53
+ b = b_new
54
+ end
55
+ ss = j.call(x,b).inverse.diagonal.map{ |x| -x}.map{ |y| Math.sqrt(y) }
56
+ values = mu.call(x,b)
57
+
58
+ residuals = y - values.column_vectors.map(&:to_a).flatten
59
+ df_residuals = y.count - x.column_size
60
+ return [create_vector(b.column_vectors[0]), create_vector(ss), create_vector(values.to_a.flatten),
61
+ residuals, max_iter, df_residuals, converged]
62
+ end
63
+
64
+ private
65
+ def self.create_vector(arr)
66
+ Statsample::Vector.new(arr, :scale)
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,108 @@
1
+ module Statsample
2
+ module Regression
3
+ module GLM
4
+
5
+ class Logistic
6
+
7
+ attr_reader :se
8
+ # The fitted mean values
9
+ attr_reader :fit
10
+ # the _working_ residuals; that is the residuals in the final iteration of the IRWLS fit.
11
+ attr_reader :residuals
12
+ # The residuals degree of freedom
13
+ attr_reader :df
14
+ # Number of iterations used for convergence
15
+ attr_reader :iter
16
+ # Boolean. Tells whether the IRWLS for the given model converged or not
17
+ attr_reader :converged
18
+
19
+ def initialize(ds, y)
20
+ @ds=ds
21
+ @fields=@ds.fields
22
+ @x = ds.to_matrix
23
+ @y = y
24
+ end
25
+
26
+ # named vector/hash of coefficients
27
+ # === Parameter
28
+ # * *type*: symbol; (:array, default). Options = [:array, :hash]
29
+ def coefficients(type=:array)
30
+ if type==:array
31
+ #originally returned as vector; so pass it
32
+ @coefficients
33
+ elsif type==:hash
34
+ h={}
35
+ @fields.size.times {|i|
36
+ h[@fields[i]]=@coefficients[i]
37
+ }
38
+ h
39
+ end
40
+ end
41
+ def self.mu(x, b)
42
+ matrix_mul = x * b
43
+ numerator = matrix_mul.map { |y| Math.exp(y) }
44
+ denominator = numerator.map { |y| 1 + y }
45
+
46
+ numerator.each_with_index { |e, r, c|
47
+ numerator[r,c] = numerator[r,c].to_f / denominator[r,c].to_f
48
+ }
49
+ end
50
+
51
+ def self.w(x, b)
52
+ mus = mu(x,b).column_vectors.map(&:to_a).flatten
53
+ mus_intermediate = mus.collect { |x| 1 - x }
54
+ w = mus.zip(mus_intermediate).collect { |x| x.inject(:*) }
55
+ w_mat = Matrix.I(w.size)
56
+ w_enum = w.to_enum
57
+ return w_mat.map do |x|
58
+ x.eql?(1) ? w_enum.next : x
59
+ end
60
+ end
61
+
62
+ def self.h(x,b,y)
63
+ x_t = x.transpose
64
+ mu_flat = mu(x,b).column_vectors.map(&:to_a).flatten
65
+ column_data = y.zip(mu_flat).collect { |x| x.inject(:-) }
66
+ x_t * Matrix.column_vector(column_data)
67
+ end
68
+
69
+ def self.j(x,b)
70
+ w_matrix = w(x, b)
71
+ jacobian_matrix = x.transpose * w_matrix * x
72
+ jacobian_matrix.map { |x| -x }
73
+ end
74
+
75
+ def to_s
76
+ sprintf("Logistic Regression (Statsample::Regression::GLM;:Logistic)")
77
+ end
78
+
79
+ # = Iteratively reweighted least squares
80
+ # Computes irwls for given model and parameters.
81
+ #
82
+ # == Usage
83
+ # require 'statsample-glm'
84
+ # x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
85
+ # x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
86
+ # y=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
87
+ # x=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
88
+ # obj = Statsample::Regression.glm(x, y, :binomial)
89
+ # #=> Logistic Regression object
90
+ # obj.irlws
91
+ # #=> Array of returned values
92
+ # obj.coefficients
93
+ # #=> named vector of coefficients
94
+
95
+ def irwls
96
+ x, y = @x, @y
97
+ #calling irwls on Regression and passing equivalent methods in lambdas.
98
+ #Ruby_level+=awesome!
99
+ @coefficients, @se, @fit, @residuals, @df, @iter, @converged = Statsample::Regression.irwls(
100
+ x,y, ->l,m{self.class.mu(l,m)}, ->l,m{self.class.w(l,m)},
101
+ ->l,m{self.class.j(l,m)}, ->k,l,m{self.class.h(k,l,m)}
102
+ )
103
+ end
104
+ end
105
+
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,90 @@
1
+ module Statsample
2
+ module Regression
3
+ module GLM
4
+
5
+ class Poisson
6
+
7
+ attr_reader :se
8
+ # The fitted mean values
9
+ attr_reader :fit
10
+ # the _working_ residuals; that is the residuals in the final iteration of the IRWLS fit.
11
+ attr_reader :residuals
12
+ # The residuals degree of freedom
13
+ attr_reader :df
14
+ # Number of iterations used for convergence
15
+ attr_reader :iter
16
+ # Boolean. Tells whether the IRWLS for the given model converged or not
17
+ attr_reader :converged
18
+
19
+ def initialize(ds, y)
20
+ @ds=ds
21
+ @fields=@ds.fields
22
+ @x = ds.to_matrix
23
+ @y = y
24
+ end
25
+
26
+ # named vector/hash of coefficients
27
+ # === Parameter
28
+ # * *type*: symbol; (:array, default). Options = [:array, :hash]
29
+ def coefficients(type=:array)
30
+ if type==:array
31
+ @coefficients
32
+ elsif type==:hash
33
+ h={}
34
+ @fields.size.times {|i|
35
+ h[@fields[i]]=@coefficients[i]
36
+ }
37
+ h
38
+ end
39
+ end
40
+
41
+ def self.mu(x, b, link=:log)
42
+ if link.downcase.to_sym == :log
43
+ (x * b).map { |y| Math.exp(y) }
44
+ elsif link.downcase.to_sym == :sqrt
45
+ (x * b).collect { |y| y**2 }
46
+ end
47
+ end
48
+
49
+ def self.w(x, b)
50
+ poisson_mu = mu(x,b)
51
+ mu_flat = poisson_mu.column_vectors.map(&:to_a).flatten
52
+
53
+ w_mat = Matrix.I(mu_flat.size)
54
+ mu_enum = mu_flat.to_enum
55
+ return w_mat.map do |x|
56
+ x.eql?(1) ? mu_enum.next : x
57
+ end
58
+ end
59
+
60
+ def self.h(x, b, y)
61
+ x_t = x.transpose
62
+ mu_flat = mu(x,b).column_vectors.map(&:to_a).flatten
63
+ column_data = y.zip(mu_flat).collect { |x| x.inject(:-) }
64
+ x_t * Matrix.columns([column_data])
65
+ end
66
+
67
+ def self.j(x, b)
68
+ w_matrix = w(x, b)
69
+ jacobian_matrix = x.transpose * w_matrix * x
70
+ jacobian_matrix.map { |x| -x }
71
+ end
72
+
73
+ def to_s
74
+ sprintf("Logistic Regression (Statsample::Regression::GLM;:Logistic)")
75
+ end
76
+
77
+ def irwls
78
+ x,y = @x,@y
79
+ #calling irwls on Regression and passing equivalent methods in lambdas.
80
+ #Ruby_level+=awesome!
81
+ @coefficients, @se, @fit, @residuals, @df, @iter, @converged = Statsample::Regression.irwls(
82
+ x,y, ->l,m{self.class.mu(l,m)}, ->l,m{self.class.w(l,m)},
83
+ ->l,m{self.class.j(l,m)}, ->k,l,m{self.class.h(k,l,m)}
84
+ )
85
+ end
86
+
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,87 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'minitest/unit'
11
+ require 'shoulda'
12
+ require 'shoulda-context'
13
+ require 'mocha/setup'
14
+
15
+
16
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
17
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
18
+ require 'statsample-glm'
19
+ module MiniTest
20
+ class Unit
21
+ class TestCase
22
+ include Shoulda::Context::Assertions
23
+ include Shoulda::Context::InstanceMethods
24
+ extend Shoulda::Context::ClassMethods
25
+ def self.should_with_gsl(name,&block)
26
+ should(name) do
27
+ if Statsample.has_gsl?
28
+ instance_eval(&block)
29
+ else
30
+ skip("Requires GSL")
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ module Assertions
38
+ def assert_similar_vector(exp, obs, delta=1e-10,msg=nil)
39
+ msg||="Different vectors #{exp} - #{obs}"
40
+ assert_equal(exp.size, obs.size)
41
+ exp.data_with_nils.each_with_index {|v,i|
42
+ assert_in_delta(v,obs[i],delta)
43
+ }
44
+ end
45
+ def assert_similar_hash(exp, obs, delta=1e-10,msg=nil)
46
+ msg||="Different hash #{exp} - #{obs}"
47
+ assert_equal(exp.size, obs.size)
48
+ exp.each_key {|k|
49
+ assert_in_delta(exp[k],obs[k],delta)
50
+ }
51
+ end
52
+
53
+ def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
54
+ assert_equal(exp.size, obs.size, "Different size.#{msg}")
55
+ exp.size.times {|i|
56
+ assert_in_delta(exp[i],obs[i],delta, "Different element #{i}. \nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
57
+ }
58
+ end
59
+ def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
60
+ assert_equal(exp.row_size, obs.row_size, "Different row size.#{msg}")
61
+ assert_equal(exp.column_size, obs.column_size, "Different column size.#{msg}")
62
+ exp.row_size.times {|i|
63
+ exp.column_size.times {|j|
64
+ assert_in_delta(exp[i,j],obs[i,j], delta, "Different element #{i},#{j}\nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
65
+ }
66
+ }
67
+ end
68
+ alias :assert_raise :assert_raises unless method_defined? :assert_raise
69
+ alias :assert_not_equal :refute_equal unless method_defined? :assert_not_equal
70
+ alias :assert_not_same :refute_same unless method_defined? :assert_not_same
71
+ unless method_defined? :assert_nothing_raised
72
+ def assert_nothing_raised(msg=nil)
73
+ msg||="Nothing should be raised, but raised %s"
74
+ begin
75
+ yield
76
+ not_raised=true
77
+ rescue Exception => e
78
+ not_raised=false
79
+ msg=sprintf(msg,e)
80
+ end
81
+ assert(not_raised,msg)
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ MiniTest::Unit.autorun
@@ -0,0 +1,4 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
+
3
+ class StatsampleRegressionGlm < MiniTest::Unit::TestCase
4
+ end
@@ -0,0 +1,23 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
+
3
+ class StatsampleRegressionGlmLogistic < MiniTest::Unit::TestCase
4
+
5
+ context("Example") do
6
+ setup do
7
+ x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
+ x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
+ @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
+ @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
+ intercept=Statsample::Vector.new([1]*50,:scale)
12
+ @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
+ @glm=Statsample::Regression.glm(@df,@y_log,:binomial)
14
+ end
15
+ should "report correct coefficientes as array" do
16
+ assert_similar_vector(@glm.coefficients,[0.675603176233325,-0.312493754568903,2.28671333346264])
17
+ end
18
+ should "report correct coefficientes as hash" do
19
+ assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.675603176233325,"x1"=>-0.312493754568903,"x2"=>2.28671333346264})
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,25 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helper.rb'))
2
+
3
+ class StatsampleRegressionGlmPoisson < MiniTest::Unit::TestCase
4
+
5
+ context("Example") do
6
+ setup do
7
+ x1=Statsample::Vector.new([0.537322309644812,-0.717124209978434,-0.519166718891331,0.434970973986765,-0.761822002215759,1.51170030921189,0.883854199811195,-0.908689798854196,1.70331977539793,-0.246971150634099,-1.59077593922623,-0.721548040910253,0.467025703920194,-0.510132788447137,0.430106510266798,-0.144353683251536,-1.54943800728303,0.849307651309298,-0.640304240933579,1.31462478279425,-0.399783455165345,0.0453055645017902,-2.58212161987746,-1.16484414309359,-1.08829266466281,-0.243893919684792,-1.96655661929441,0.301335373291024,-0.665832694463588,-0.0120650855753837,1.5116066367604,0.557300353673344,1.12829931872045,0.234443748015922,-2.03486690662651,0.275544751380246,-0.231465849558696,-0.356880153225012,-0.57746647541923,1.35758352580655,1.23971669378224,-0.662466275100489,0.313263561921793,-1.08783223256362,1.41964722846899,1.29325100940785,0.72153880625103,0.440580131022748,0.0351917814720056, -0.142353224879252],:scale)
8
+ x2=Statsample::Vector.new([-0.866655707911859,-0.367820249977585,0.361486610435,0.857332626245179,0.133438466268095,0.716104533073575,1.77206093023382,-0.10136697295802,-0.777086491435508,-0.204573554913706,0.963353531412233,-1.10103024900542,-0.404372761837392,-0.230226345183469,0.0363730246866971,-0.838265540390497,1.12543549657924,-0.57929175648001,-0.747060244805248,0.58946979365152,-0.531952663697324,1.53338594419818,0.521992029051441,1.41631763288724,0.611402316795129,-0.518355638373296,-0.515192557101107,-0.672697937866108,1.84347042325327,-0.21195540664804,-0.269869371631611,0.296155694010096,-2.18097898069634,-1.21314663927206,1.49193669881581,1.38969280369493,-0.400680808117106,-1.87282814976479,1.82394870451051,0.637864732838274,-0.141155946382493,0.0699950644281617,1.32568550595165,-0.412599258349398,0.14436832227506,-1.16507785388489,-2.16782049922428,0.24318371493798,0.258954871320764,-0.151966534521183],:scale)
9
+ @y_log=Statsample::Vector.new([0,0,1,0,1,1,1,1,0,1,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,0,1,1,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,1,0,0,0,1,1],:scale)
10
+ @y_pois=Statsample::Vector.new([1,2,1,3,3,1,10,1,1,2,15,0,0,2,1,2,18,2,1,1,1,8,18,13,7,1,1,0,26,0,2,2,0,0,25,7,0,0,21,0,0,1,5,0,3,0,0,1,0,0],:scale)
11
+ intercept=Statsample::Vector.new([1]*50,:scale)
12
+ @df=Statsample::Dataset.new({"i"=>intercept,"x1"=>x1,"x2"=>x2})
13
+ @glm=Statsample::Regression.glm(@df,@y_pois,:poisson)
14
+
15
+ end
16
+ should "report correct coefficientes as array" do
17
+ assert_similar_vector(@glm.coefficients,[0.32993246633711,-0.586359358356708,1.28511323439258])
18
+ end # should
19
+ should "report correct coefficientes as hash" do
20
+ assert_similar_hash(@glm.coefficients(:hash), {"i"=>0.32993246633711,"x1"=>-0.586359358356708, "x2"=>1.28511323439258})
21
+ end # should
22
+
23
+ end # context
24
+ end # class
25
+
metadata ADDED
@@ -0,0 +1,246 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: statsample-glm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ankur Goel
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-09-25 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: statsample
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.2.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.2.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: activesupport
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - '='
36
+ - !ruby/object:Gem::Version
37
+ version: 3.2.10
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - '='
44
+ - !ruby/object:Gem::Version
45
+ version: 3.2.10
46
+ - !ruby/object:Gem::Dependency
47
+ name: shoulda
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rdoc
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '3.12'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '3.12'
78
+ - !ruby/object:Gem::Dependency
79
+ name: minitest
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 4.7.5
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 4.7.5
94
+ - !ruby/object:Gem::Dependency
95
+ name: cucumber
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: bundler
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: 1.3.5
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ~>
124
+ - !ruby/object:Gem::Version
125
+ version: 1.3.5
126
+ - !ruby/object:Gem::Dependency
127
+ name: jeweler
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: 1.8.4
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: 1.8.4
142
+ - !ruby/object:Gem::Dependency
143
+ name: bio
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ! '>='
148
+ - !ruby/object:Gem::Version
149
+ version: 1.4.2
150
+ type: :development
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ! '>='
156
+ - !ruby/object:Gem::Version
157
+ version: 1.4.2
158
+ - !ruby/object:Gem::Dependency
159
+ name: rdoc
160
+ requirement: !ruby/object:Gem::Requirement
161
+ none: false
162
+ requirements:
163
+ - - ~>
164
+ - !ruby/object:Gem::Version
165
+ version: '3.12'
166
+ type: :development
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ~>
172
+ - !ruby/object:Gem::Version
173
+ version: '3.12'
174
+ - !ruby/object:Gem::Dependency
175
+ name: mocha
176
+ requirement: !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ~>
180
+ - !ruby/object:Gem::Version
181
+ version: 0.14.0
182
+ type: :development
183
+ prerelease: false
184
+ version_requirements: !ruby/object:Gem::Requirement
185
+ none: false
186
+ requirements:
187
+ - - ~>
188
+ - !ruby/object:Gem::Version
189
+ version: 0.14.0
190
+ description: Statsample-GLM is an extension to Statsample, an advance statistics suite
191
+ in Ruby. This gem includes modules for Regression techniques such as Poisson Regression,
192
+ Logistic Regression and Exponential Regression
193
+ email: ankurgel@gmail.com
194
+ executables: []
195
+ extensions: []
196
+ extra_rdoc_files:
197
+ - LICENSE.txt
198
+ - README.rdoc
199
+ files:
200
+ - .document
201
+ - .travis.yml
202
+ - Gemfile
203
+ - LICENSE.txt
204
+ - README.rdoc
205
+ - Rakefile
206
+ - VERSION
207
+ - features/bio-statsample-glm.feature
208
+ - features/step_definitions/bio-statsample-glm_steps.rb
209
+ - features/support/env.rb
210
+ - lib/statsample-glm.rb
211
+ - lib/statsample-glm/regression.rb
212
+ - lib/statsample-glm/regression/logistic.rb
213
+ - lib/statsample-glm/regression/poisson.rb
214
+ - test/helper.rb
215
+ - test/test_glm.rb
216
+ - test/test_glm_logistic.rb
217
+ - test/test_glm_poisson.rb
218
+ homepage: http://github.com/AnkurGel/statsample-glm
219
+ licenses:
220
+ - MIT
221
+ post_install_message:
222
+ rdoc_options: []
223
+ require_paths:
224
+ - lib
225
+ required_ruby_version: !ruby/object:Gem::Requirement
226
+ none: false
227
+ requirements:
228
+ - - ! '>='
229
+ - !ruby/object:Gem::Version
230
+ version: '0'
231
+ segments:
232
+ - 0
233
+ hash: 891920669
234
+ required_rubygems_version: !ruby/object:Gem::Requirement
235
+ none: false
236
+ requirements:
237
+ - - ! '>='
238
+ - !ruby/object:Gem::Version
239
+ version: '0'
240
+ requirements: []
241
+ rubyforge_project:
242
+ rubygems_version: 1.8.25
243
+ signing_key:
244
+ specification_version: 3
245
+ summary: Generalized Linear Models for Statsample
246
+ test_files: []