RubyGems - glm - Versions diffs - 0.0.0 - Mend

glm 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/data/iris.data ADDED

@@ -0,0 +1,151 @@
+5.1,3.5,1.4,0.2,Iris-setosa
+4.9,3.0,1.4,0.2,Iris-setosa
+4.7,3.2,1.3,0.2,Iris-setosa
+4.6,3.1,1.5,0.2,Iris-setosa
+5.0,3.6,1.4,0.2,Iris-setosa
+5.4,3.9,1.7,0.4,Iris-setosa
+4.6,3.4,1.4,0.3,Iris-setosa
+5.0,3.4,1.5,0.2,Iris-setosa
+4.4,2.9,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.4,3.7,1.5,0.2,Iris-setosa
+4.8,3.4,1.6,0.2,Iris-setosa
+4.8,3.0,1.4,0.1,Iris-setosa
+4.3,3.0,1.1,0.1,Iris-setosa
+5.8,4.0,1.2,0.2,Iris-setosa
+5.7,4.4,1.5,0.4,Iris-setosa
+5.4,3.9,1.3,0.4,Iris-setosa
+5.1,3.5,1.4,0.3,Iris-setosa
+5.7,3.8,1.7,0.3,Iris-setosa
+5.1,3.8,1.5,0.3,Iris-setosa
+5.4,3.4,1.7,0.2,Iris-setosa
+5.1,3.7,1.5,0.4,Iris-setosa
+4.6,3.6,1.0,0.2,Iris-setosa
+5.1,3.3,1.7,0.5,Iris-setosa
+4.8,3.4,1.9,0.2,Iris-setosa
+5.0,3.0,1.6,0.2,Iris-setosa
+5.0,3.4,1.6,0.4,Iris-setosa
+5.2,3.5,1.5,0.2,Iris-setosa
+5.2,3.4,1.4,0.2,Iris-setosa
+4.7,3.2,1.6,0.2,Iris-setosa
+4.8,3.1,1.6,0.2,Iris-setosa
+5.4,3.4,1.5,0.4,Iris-setosa
+5.2,4.1,1.5,0.1,Iris-setosa
+5.5,4.2,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.0,3.2,1.2,0.2,Iris-setosa
+5.5,3.5,1.3,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+4.4,3.0,1.3,0.2,Iris-setosa
+5.1,3.4,1.5,0.2,Iris-setosa
+5.0,3.5,1.3,0.3,Iris-setosa
+4.5,2.3,1.3,0.3,Iris-setosa
+4.4,3.2,1.3,0.2,Iris-setosa
+5.0,3.5,1.6,0.6,Iris-setosa
+5.1,3.8,1.9,0.4,Iris-setosa
+4.8,3.0,1.4,0.3,Iris-setosa
+5.1,3.8,1.6,0.2,Iris-setosa
+4.6,3.2,1.4,0.2,Iris-setosa
+5.3,3.7,1.5,0.2,Iris-setosa
+5.0,3.3,1.4,0.2,Iris-setosa
+7.0,3.2,4.7,1.4,Iris-versicolor
+6.4,3.2,4.5,1.5,Iris-versicolor
+6.9,3.1,4.9,1.5,Iris-versicolor
+5.5,2.3,4.0,1.3,Iris-versicolor
+6.5,2.8,4.6,1.5,Iris-versicolor
+5.7,2.8,4.5,1.3,Iris-versicolor
+6.3,3.3,4.7,1.6,Iris-versicolor
+4.9,2.4,3.3,1.0,Iris-versicolor
+6.6,2.9,4.6,1.3,Iris-versicolor
+5.2,2.7,3.9,1.4,Iris-versicolor
+5.0,2.0,3.5,1.0,Iris-versicolor
+5.9,3.0,4.2,1.5,Iris-versicolor
+6.0,2.2,4.0,1.0,Iris-versicolor
+6.1,2.9,4.7,1.4,Iris-versicolor
+5.6,2.9,3.6,1.3,Iris-versicolor
+6.7,3.1,4.4,1.4,Iris-versicolor
+5.6,3.0,4.5,1.5,Iris-versicolor
+5.8,2.7,4.1,1.0,Iris-versicolor
+6.2,2.2,4.5,1.5,Iris-versicolor
+5.6,2.5,3.9,1.1,Iris-versicolor
+5.9,3.2,4.8,1.8,Iris-versicolor
+6.1,2.8,4.0,1.3,Iris-versicolor
+6.3,2.5,4.9,1.5,Iris-versicolor
+6.1,2.8,4.7,1.2,Iris-versicolor
+6.4,2.9,4.3,1.3,Iris-versicolor
+6.6,3.0,4.4,1.4,Iris-versicolor
+6.8,2.8,4.8,1.4,Iris-versicolor
+6.7,3.0,5.0,1.7,Iris-versicolor
+6.0,2.9,4.5,1.5,Iris-versicolor
+5.7,2.6,3.5,1.0,Iris-versicolor
+5.5,2.4,3.8,1.1,Iris-versicolor
+5.5,2.4,3.7,1.0,Iris-versicolor
+5.8,2.7,3.9,1.2,Iris-versicolor
+6.0,2.7,5.1,1.6,Iris-versicolor
+5.4,3.0,4.5,1.5,Iris-versicolor
+6.0,3.4,4.5,1.6,Iris-versicolor
+6.7,3.1,4.7,1.5,Iris-versicolor
+6.3,2.3,4.4,1.3,Iris-versicolor
+5.6,3.0,4.1,1.3,Iris-versicolor
+5.5,2.5,4.0,1.3,Iris-versicolor
+5.5,2.6,4.4,1.2,Iris-versicolor
+6.1,3.0,4.6,1.4,Iris-versicolor
+5.8,2.6,4.0,1.2,Iris-versicolor
+5.0,2.3,3.3,1.0,Iris-versicolor
+5.6,2.7,4.2,1.3,Iris-versicolor
+5.7,3.0,4.2,1.2,Iris-versicolor
+5.7,2.9,4.2,1.3,Iris-versicolor
+6.2,2.9,4.3,1.3,Iris-versicolor
+5.1,2.5,3.0,1.1,Iris-versicolor
+5.7,2.8,4.1,1.3,Iris-versicolor
+6.3,3.3,6.0,2.5,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+7.1,3.0,5.9,2.1,Iris-virginica
+6.3,2.9,5.6,1.8,Iris-virginica
+6.5,3.0,5.8,2.2,Iris-virginica
+7.6,3.0,6.6,2.1,Iris-virginica
+4.9,2.5,4.5,1.7,Iris-virginica
+7.3,2.9,6.3,1.8,Iris-virginica
+6.7,2.5,5.8,1.8,Iris-virginica
+7.2,3.6,6.1,2.5,Iris-virginica
+6.5,3.2,5.1,2.0,Iris-virginica
+6.4,2.7,5.3,1.9,Iris-virginica
+6.8,3.0,5.5,2.1,Iris-virginica
+5.7,2.5,5.0,2.0,Iris-virginica
+5.8,2.8,5.1,2.4,Iris-virginica
+6.4,3.2,5.3,2.3,Iris-virginica
+6.5,3.0,5.5,1.8,Iris-virginica
+7.7,3.8,6.7,2.2,Iris-virginica
+7.7,2.6,6.9,2.3,Iris-virginica
+6.0,2.2,5.0,1.5,Iris-virginica
+6.9,3.2,5.7,2.3,Iris-virginica
+5.6,2.8,4.9,2.0,Iris-virginica
+7.7,2.8,6.7,2.0,Iris-virginica
+6.3,2.7,4.9,1.8,Iris-virginica
+6.7,3.3,5.7,2.1,Iris-virginica
+7.2,3.2,6.0,1.8,Iris-virginica
+6.2,2.8,4.8,1.8,Iris-virginica
+6.1,3.0,4.9,1.8,Iris-virginica
+6.4,2.8,5.6,2.1,Iris-virginica
+7.2,3.0,5.8,1.6,Iris-virginica
+7.4,2.8,6.1,1.9,Iris-virginica
+7.9,3.8,6.4,2.0,Iris-virginica
+6.4,2.8,5.6,2.2,Iris-virginica
+6.3,2.8,5.1,1.5,Iris-virginica
+6.1,2.6,5.6,1.4,Iris-virginica
+7.7,3.0,6.1,2.3,Iris-virginica
+6.3,3.4,5.6,2.4,Iris-virginica
+6.4,3.1,5.5,1.8,Iris-virginica
+6.0,3.0,4.8,1.8,Iris-virginica
+6.9,3.1,5.4,2.1,Iris-virginica
+6.7,3.1,5.6,2.4,Iris-virginica
+6.9,3.1,5.1,2.3,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+6.8,3.2,5.9,2.3,Iris-virginica
+6.7,3.3,5.7,2.5,Iris-virginica
+6.7,3.0,5.2,2.3,Iris-virginica
+6.3,2.5,5.0,1.9,Iris-virginica
+6.5,3.0,5.2,2.0,Iris-virginica
+6.2,3.4,5.4,2.3,Iris-virginica
+5.9,3.0,5.1,1.8,Iris-virginica

data/examples/prepare.rb ADDED

@@ -0,0 +1,14 @@
+  #It just loads iris data as a 2-d array, rows being samples, columns 0...n-1 being flaot-valued features and column n-1 being target {0,1}
+  def load_iris
+    ifile = File.open(
+                     File.join("data","iris.data"))
+    iris = ((ifile.readlines.map {|l|
+               fields = l.chomp.split(pattern=",")
+               fields
+             }).select {|fields|
+              fields[4] != "Iris-virginica" && fields != []}).map {|fields|
+      fields[4] = (fields[4] == "Iris-setosa") ? 1 : 0
+      fields.map{|f| f.to_f}}
+    ifile.close
+    return iris
+  end

data/examples/test_glm_linear.rb ADDED

@@ -0,0 +1,28 @@
+require 'glm'
+require 'pp'
+require 'ruby-debug' ; Debugger.start(:post_mortem => true)
+Debugger.settings[:autoeval] = true
+require 'prepare'
+T = 2000
+iris = load_iris
+y_arr = iris.map {|r| r[0]}
+x = iris.map {|r| r[1...-1]}
+x = x.map{|r| r << 1}
+x,y = GLM::Util.formatArrays(x, y_arr)
+linear = GLM::Linear.new(x, y)
+(1..T).each do |i|
+  linear.sto_update
+  est_y =  linear.est(x)
+  err = (y.zip(est_y).map{|e| (e[0]-e[1])**2}).reduce(:+)/y.row_size
+  puts err
+end
+#end
+#puts ((y_est - y).map {|e| e ** 2}).reduce(:+)/y.row_size
+#    puts "Theta: #{y_est.to_a.to_s}"

data/examples/test_glm_logit.rb ADDED

@@ -0,0 +1,26 @@
+require 'glm'
+require 'pp'
+require 'ruby-debug' ; Debugger.start(:post_mortem => true)
+Debugger.settings[:autoeval] = true
+require 'prepare'
+T = 200
+iris = load_iris
+y_arr = iris.map {|r| r[-1].to_i}
+x = iris.map {|r| r[0...-2]}
+x = x.map{|r| r << 1}
+x,y = GLM::Util.formatArrays(x, y_arr)
+glm = GLM::Logit.new(x, y, 0.1)
+(1..T).each do |i|
+  glm.sto_update
+  est_y =  glm.est(x)
+  err = (y.zip(est_y).map{|e| (e[0]!=e[1])?1:0}).reduce(:+)#/y.row_size
+#  pp y.zip(est_y)
+  puts err
+  pp glm.theta
+end

data/lib/glm.rb ADDED

@@ -0,0 +1,20 @@
+require 'matrix'
+include Math
+#The main class
+#Generalized Linear Models
+#
+class GLM
+  # Vectorize a function intended for
+  def self.vectorize()
+  end
+end
+require 'glm/logit'
+require 'glm/linear'
+require 'glm/util'

data/lib/glm/base.rb ADDED

@@ -0,0 +1,103 @@
+class GLM::Base
+  def initialize(x,y,alpha = 0.1)
+    @x = x
+    @y = y
+    @@alpha = alpha
+    @theta = Array.new(x.column_size,1)
+  end
+  #Log partition function <b>a(eta)</b>, intended to be overriden
+  def a
+    raise 'Log partition function a(eta) undefined'
+  end
+  #intended to be overriden
+  def b
+    raise 'b undefined'
+  end
+  def format(x)
+    if x.is_a? Array
+      if x[0].is_a?(Array)
+        x.map {|e|
+          output(e)}
+      else
+        output(x)
+      end
+    #Assuming x.is_a?(Matrix) == true
+    else
+      x.row_vectors.map {|e|
+        output(Matrix.row_vector(e))
+      }
+    end
+  end
+  # Estimator
+  # =Arguments:
+  #   x: a feature vector in Array
+  # =Returns:
+  #   Estimation
+  def est(x)
+    format(x)
+  end
+  #Output estimation from E(y|theta,x)
+  #Need overriding, except for plain linear regression
+  def output(x)
+    return h(x.t)
+  end
+  #Natural parameter eta
+  def eta(x)
+    tmp = (Matrix.column_vector(@theta).t * x)[0,0]
+    return tmp
+  end
+  #Sufficient statistic <b>T</b>
+  def T
+    return @y
+  end
+  #Canonical reponse function, intended to be overriden
+  def self.g(eta)
+    raise 'Canonical reponse function g(eta) undefined'
+  end
+  #Gradient on one sample
+  def gradient(x,y,v)
+    tmp = h(v)
+    return (y - tmp) * x
+  end
+  # Hypothesis function, outputs E(y|theta, x), mean of y given x parameterized by theta
+  # =Parameters:
+  #   x: a feature vector
+  # =Returns:
+  #   E(y|theta, x)
+  def h(x)
+    tmp = eta(x)
+    return self.class.g(tmp)
+  end
+  #A step based on one sample in stochastic gradient descent
+  def single_update()
+  end
+  #One complete loop of stochastic gradient descend
+  def sto_update()
+    (0...(@x.row_size)).each do |i|
+      (0...(@x.column_size)).each do |j|
+        @theta[j] += @@alpha * gradient(@x[i,j], @y[i,0], Matrix.column_vector(@x.row(i)))
+      end
+    end
+  end
+  def theta()
+    return @theta
+  end
+end

data/lib/glm/linear.rb ADDED

@@ -0,0 +1,27 @@
+require 'glm/base'
+class GLM::Linear < GLM::Base
+  def hi
+    return "Hi, this is #{self.class}"
+  end
+  # Canonical response function
+  # It's identity function here, obviously
+  def self.g(eta)
+    return eta
+  end
+  def ne_est(x)
+    @theta = ne_fit
+    [@theta, x * Matrix.column_vector(@theta)]
+  end
+  #Normal equation fit
+  def ne_fit
+    (( @x.t * @x ).inverse * @x.t * @y).to_a.flatten
+  end
+end

data/lib/glm/logit.rb ADDED

@@ -0,0 +1,62 @@
+require 'glm/base'
+class GLM::Logit < GLM::Base
+  def a
+    return -Math.log(1-phi)
+  end
+  def b
+    return 1
+  end
+  # Canonical response function
+  def self.g(eta)
+    self.sigmoid(eta)
+  end
+  def output(x)
+    return (h(x.t) > 0.5)?1:0
+  end
+  def phi
+    return h(x)
+  end
+  # Logistic function on vectors, parameterized by theta
+  # Arguments:
+  #   theta: An array
+  def self.sigmoid_vec(theta)
+    # Returns a closure which takes
+    # Arguments:
+    #   x: single row matrix
+    return lambda {|x|
+      sigmoid(
+            (Matrix.row_vector(x) * Matrix.row_vector(theta).t).tr)}
+  end
+  # Logistic function
+  # Arguments:
+  #   x: scalar
+  def self.sigmoid(x)
+    return 1/(1 + exp(-x))
+  end
+  # Derivative of Logistic function
+  # Arguments:
+  #   x: scalar
+  def self.deriv_sigmoid( x )
+    return sigmoid( x ) * ( 1 - sigmoid( x ) )
+  end
+  def self.logit(z)
+    Math.log(z/(1-z))
+  end
+  def self.truth
+    "Sanity is for the weak!"
+  end
+end

data/lib/glm/util.rb ADDED

@@ -0,0 +1,9 @@
+require 'matrix'
+class GLM::Util
+  def self.formatArrays(x, y)
+    x = Matrix.rows(x)
+    y = Matrix.column_vector(y)
+    return [x, y]
+  end
+end

data/test/test_glm.rb ADDED

@@ -0,0 +1,41 @@
+require 'test/unit'
+require 'glm'
+require 'pp'
+require 'ruby-debug' ; Debugger.start(:post_mortem => true)
+class GLMTest < Test::Unit::TestCase
+  def test_glm_logit_truth
+    assert_equal "Sanity is for the weak!",
+    GLM::Logit.truth
+  end
+  #Helper method
+  def load_iris
+    ifile = File.open(
+                     File.join("data","iris.data"))
+    iris = ((ifile.readlines.map {|l|
+               fields = l.chomp.split(pattern=",")
+               fields
+             }).select {|fields|
+              fields[4] != "Iris-virginica" && fields != []}).map {|fields|
+      fields[4] = (fields[4] == "Iris-setosa") ? 1 : 0
+      fields.map{|f| f.to_f}}
+    assert_equal 100, iris.length
+    ifile.close
+    return iris
+  end
+  def test_linear
+    iris = load_iris
+    y = iris.map {|r| r[0]}
+    x = iris.map {|r| r[1...-1]}
+    x = x.map{|r| r << 1}
+    x,y = GLM::Util.formatArrays(x, y)
+    linear = GLM::Linear.new(x, y)
+    theta, y_est = linear.ne_est(x)
+    assert  ((y_est - y).map {|e| e ** 2}).reduce(:+)/y.row_size < 0.1
+  end
+end

metadata ADDED

@@ -0,0 +1,54 @@
+--- !ruby/object:Gem::Specification
+name: glm
+version: !ruby/object:Gem::Version
+  version: 0.0.0
+  prerelease:
+platform: ruby
+authors:
+- Yu Shen
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2012-03-19 00:00:00.000000000Z
+dependencies: []
+description: Generalized Linear Method
+email: yushen83@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/glm.rb
+- lib/glm/base.rb
+- lib/glm/linear.rb
+- lib/glm/logit.rb
+- lib/glm/util.rb
+- examples/prepare.rb
+- examples/test_glm_logit.rb
+- examples/test_glm_linear.rb
+- test/test_glm.rb
+- data/iris.data
+homepage: https://github.com/yushen
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.19
+signing_key:
+specification_version: 3
+summary: This is a W.I.P implementation of GLM
+test_files: []