glm 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -8,16 +8,17 @@ T = 2000
8
8
 
9
9
  iris = load_iris
10
10
  y_arr = iris.map {|r| r[0]}
11
+ y = GSL::Vector.alloc y_arr
11
12
  x = iris.map {|r| r[1...-1]}
12
13
  x = x.map{|r| r << 1}
13
- x,y = GLM::Util.formatArrays(x, y_arr)
14
+ x = GLM::Util.aa_to_gsl_matrix x
14
15
 
15
- linear = GLM::Linear.new(x, y)
16
+ linear = GLM::Linear.new(x, y,0.05)
16
17
 
17
18
  (1..T).each do |i|
18
19
  linear.sto_update
19
20
  est_y = linear.est(x)
20
- err = (y.zip(est_y).map{|e| (e[0]-e[1])**2}).reduce(:+)/y.row_size
21
+ err = (est_y - y).nrm2
21
22
  puts err
22
23
  end
23
24
 
@@ -8,19 +8,17 @@ T = 200
8
8
 
9
9
  iris = load_iris
10
10
  y_arr = iris.map {|r| r[-1].to_i}
11
+ y = GSL::Vector.alloc y_arr
11
12
  x = iris.map {|r| r[0...-2]}
12
13
  x = x.map{|r| r << 1}
13
- x,y = GLM::Util.formatArrays(x, y_arr)
14
+ x = GLM::Util.aa_to_gsl_matrix x
14
15
 
15
16
  glm = GLM::Logit.new(x, y, 0.1)
16
17
 
17
18
  (1..T).each do |i|
18
19
  glm.sto_update
19
-
20
20
  est_y = glm.est(x)
21
- err = (y.zip(est_y).map{|e| (e[0]!=e[1])?1:0}).reduce(:+)#/y.row_size
22
- # pp y.zip(est_y)
23
-
21
+ err = (est_y - y).nrm2
24
22
  puts err
25
23
  pp glm.theta
26
24
  end
data/lib/glm.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'matrix'
2
+ require 'gsl'
2
3
  include Math
3
4
 
4
5
 
@@ -1,10 +1,10 @@
1
1
  class GLM::Base
2
-
3
- def initialize(x,y,alpha = 0.1)
2
+ @@initial_weight = 1
3
+ def initialize(x,y,alpha = 0.05)
4
4
  @x = x
5
5
  @y = y
6
6
  @@alpha = alpha
7
- @theta = Array.new(x.column_size,1)
7
+ @theta = GSL::Vector.alloc(Array.new(x.size2, @@initial_weight))
8
8
  end
9
9
 
10
10
  #Log partition function <b>a(eta)</b>, intended to be overriden
@@ -18,22 +18,16 @@ class GLM::Base
18
18
  end
19
19
 
20
20
  def format(x)
21
- if x.is_a? Array
22
- if x[0].is_a?(Array)
23
- x.map {|e|
24
- output(e)}
25
- else
26
- output(x)
27
- end
28
- #Assuming x.is_a?(Matrix) == true
29
- else
30
- x.row_vectors.map {|e|
31
- output(Matrix.row_vector(e))
32
- }
21
+ if x.is_a? GSL::Vector
22
+ return output(x)
23
+ elsif x.is_a? GSL::Matrix
24
+ tmp = GSL::Vector.alloc x.size1
25
+ (0...x.size1).each {|i|
26
+ tmp[i]= output(x.row(i))}
27
+ return tmp
33
28
  end
34
29
  end
35
30
 
36
-
37
31
  # Estimator
38
32
  # =Arguments:
39
33
  # x: a feature vector in Array
@@ -46,12 +40,12 @@ class GLM::Base
46
40
  #Output estimation from E(y|theta,x)
47
41
  #Need overriding, except for plain linear regression
48
42
  def output(x)
49
- return h(x.t)
43
+ return h(x)
50
44
  end
51
45
 
52
46
  #Natural parameter eta
53
47
  def eta(x)
54
- tmp = (Matrix.column_vector(@theta).t * x)[0,0]
48
+ tmp = @theta * x.transpose
55
49
  return tmp
56
50
  end
57
51
 
@@ -69,7 +63,8 @@ class GLM::Base
69
63
  #Gradient on one sample
70
64
  def gradient(x,y,v)
71
65
  tmp = h(v)
72
- return (y - tmp) * x
66
+ res = (y - tmp) * x
67
+ return res
73
68
  end
74
69
 
75
70
  # Hypothesis function, outputs E(y|theta, x), mean of y given x parameterized by theta
@@ -89,11 +84,13 @@ class GLM::Base
89
84
 
90
85
  #One complete loop of stochastic gradient descend
91
86
  def sto_update()
92
- (0...(@x.row_size)).each do |i|
93
- (0...(@x.column_size)).each do |j|
94
- @theta[j] += @@alpha * gradient(@x[i,j], @y[i,0], Matrix.column_vector(@x.row(i)))
87
+ (0...(@x.size1)).each do |i|
88
+ (0...(@x.size2)).each do |j|
89
+ updates = gradient(@x[i,j], @y[i], @x.row(i))
90
+ @theta[j] = @theta[j] + @@alpha * updates
95
91
  end
96
92
  end
93
+ pp @theta
97
94
  end
98
95
 
99
96
  def theta()
@@ -13,15 +13,16 @@ class GLM::Linear < GLM::Base
13
13
  def self.g(eta)
14
14
  return eta
15
15
  end
16
-
16
+
17
+ #Normal equation estimation
17
18
  def ne_est(x)
18
19
  @theta = ne_fit
19
- [@theta, x * Matrix.column_vector(@theta)]
20
+ [@theta, x * @theta]
20
21
  end
21
22
 
22
23
  #Normal equation fit
23
24
  def ne_fit
24
- (( @x.t * @x ).inverse * @x.t * @y).to_a.flatten
25
+ ( @x.transpose * @x ).invert * @x.transpose * @y
25
26
  end
26
27
 
27
28
  end
@@ -17,7 +17,7 @@ class GLM::Logit < GLM::Base
17
17
  end
18
18
 
19
19
  def output(x)
20
- return (h(x.t) > 0.5)?1:0
20
+ return (h(x) > 0.5)?1:0
21
21
  end
22
22
 
23
23
  def phi
@@ -6,4 +6,16 @@ class GLM::Util
6
6
  y = Matrix.column_vector(y)
7
7
  return [x, y]
8
8
  end
9
+
10
+ #Construct a GSL::Matrix out of an Array of Arrays, should have been in GSL::Matrix already
11
+ def self.aa_to_gsl_matrix(aa)
12
+ n_rows = aa.length
13
+ n_cols = aa[0].length
14
+ gmat = GSL::Matrix.alloc n_rows,n_cols
15
+ (0...n_rows).each {|i|
16
+ (0...n_cols).each {|j|
17
+ gmat.set i, j, aa[i][j]}}
18
+ return gmat
19
+ end
20
+
9
21
  end
@@ -1,7 +1,9 @@
1
1
  require 'test/unit'
2
2
  require 'glm'
3
3
  require 'pp'
4
- require 'ruby-debug' ; Debugger.start(:post_mortem => true)
4
+ require 'ruby-debug'
5
+ Debugger.start(:post_mortem => true)
6
+ Debugger.settings[:autoeval] = true
5
7
 
6
8
  class GLMTest < Test::Unit::TestCase
7
9
  def test_glm_logit_truth
@@ -30,10 +32,12 @@ class GLMTest < Test::Unit::TestCase
30
32
  y = iris.map {|r| r[0]}
31
33
  x = iris.map {|r| r[1...-1]}
32
34
  x = x.map{|r| r << 1}
33
- x,y = GLM::Util.formatArrays(x, y)
35
+ #x,y = GLM::Util.formatArrays(x, y)
36
+ x = GLM::Util.aa_to_gsl_matrix(x)
37
+ y = GSL::Vector.alloc(y)
34
38
  linear = GLM::Linear.new(x, y)
35
39
  theta, y_est = linear.ne_est(x)
36
- assert ((y_est - y).map {|e| e ** 2}).reduce(:+)/y.row_size < 0.1
40
+ assert ((y_est - y).map {|e| e ** 2}).norm < 0.1
37
41
  end
38
42
 
39
43
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: glm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -48,8 +48,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
48
48
  version: '0'
49
49
  requirements: []
50
50
  rubyforge_project:
51
- rubygems_version: 1.8.10
51
+ rubygems_version: 1.8.19
52
52
  signing_key:
53
53
  specification_version: 3
54
54
  summary: This is a W.I.P implementation of GLM
55
55
  test_files: []
56
+ has_rdoc: