glm 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,151 @@
1
+ 5.1,3.5,1.4,0.2,Iris-setosa
2
+ 4.9,3.0,1.4,0.2,Iris-setosa
3
+ 4.7,3.2,1.3,0.2,Iris-setosa
4
+ 4.6,3.1,1.5,0.2,Iris-setosa
5
+ 5.0,3.6,1.4,0.2,Iris-setosa
6
+ 5.4,3.9,1.7,0.4,Iris-setosa
7
+ 4.6,3.4,1.4,0.3,Iris-setosa
8
+ 5.0,3.4,1.5,0.2,Iris-setosa
9
+ 4.4,2.9,1.4,0.2,Iris-setosa
10
+ 4.9,3.1,1.5,0.1,Iris-setosa
11
+ 5.4,3.7,1.5,0.2,Iris-setosa
12
+ 4.8,3.4,1.6,0.2,Iris-setosa
13
+ 4.8,3.0,1.4,0.1,Iris-setosa
14
+ 4.3,3.0,1.1,0.1,Iris-setosa
15
+ 5.8,4.0,1.2,0.2,Iris-setosa
16
+ 5.7,4.4,1.5,0.4,Iris-setosa
17
+ 5.4,3.9,1.3,0.4,Iris-setosa
18
+ 5.1,3.5,1.4,0.3,Iris-setosa
19
+ 5.7,3.8,1.7,0.3,Iris-setosa
20
+ 5.1,3.8,1.5,0.3,Iris-setosa
21
+ 5.4,3.4,1.7,0.2,Iris-setosa
22
+ 5.1,3.7,1.5,0.4,Iris-setosa
23
+ 4.6,3.6,1.0,0.2,Iris-setosa
24
+ 5.1,3.3,1.7,0.5,Iris-setosa
25
+ 4.8,3.4,1.9,0.2,Iris-setosa
26
+ 5.0,3.0,1.6,0.2,Iris-setosa
27
+ 5.0,3.4,1.6,0.4,Iris-setosa
28
+ 5.2,3.5,1.5,0.2,Iris-setosa
29
+ 5.2,3.4,1.4,0.2,Iris-setosa
30
+ 4.7,3.2,1.6,0.2,Iris-setosa
31
+ 4.8,3.1,1.6,0.2,Iris-setosa
32
+ 5.4,3.4,1.5,0.4,Iris-setosa
33
+ 5.2,4.1,1.5,0.1,Iris-setosa
34
+ 5.5,4.2,1.4,0.2,Iris-setosa
35
+ 4.9,3.1,1.5,0.1,Iris-setosa
36
+ 5.0,3.2,1.2,0.2,Iris-setosa
37
+ 5.5,3.5,1.3,0.2,Iris-setosa
38
+ 4.9,3.1,1.5,0.1,Iris-setosa
39
+ 4.4,3.0,1.3,0.2,Iris-setosa
40
+ 5.1,3.4,1.5,0.2,Iris-setosa
41
+ 5.0,3.5,1.3,0.3,Iris-setosa
42
+ 4.5,2.3,1.3,0.3,Iris-setosa
43
+ 4.4,3.2,1.3,0.2,Iris-setosa
44
+ 5.0,3.5,1.6,0.6,Iris-setosa
45
+ 5.1,3.8,1.9,0.4,Iris-setosa
46
+ 4.8,3.0,1.4,0.3,Iris-setosa
47
+ 5.1,3.8,1.6,0.2,Iris-setosa
48
+ 4.6,3.2,1.4,0.2,Iris-setosa
49
+ 5.3,3.7,1.5,0.2,Iris-setosa
50
+ 5.0,3.3,1.4,0.2,Iris-setosa
51
+ 7.0,3.2,4.7,1.4,Iris-versicolor
52
+ 6.4,3.2,4.5,1.5,Iris-versicolor
53
+ 6.9,3.1,4.9,1.5,Iris-versicolor
54
+ 5.5,2.3,4.0,1.3,Iris-versicolor
55
+ 6.5,2.8,4.6,1.5,Iris-versicolor
56
+ 5.7,2.8,4.5,1.3,Iris-versicolor
57
+ 6.3,3.3,4.7,1.6,Iris-versicolor
58
+ 4.9,2.4,3.3,1.0,Iris-versicolor
59
+ 6.6,2.9,4.6,1.3,Iris-versicolor
60
+ 5.2,2.7,3.9,1.4,Iris-versicolor
61
+ 5.0,2.0,3.5,1.0,Iris-versicolor
62
+ 5.9,3.0,4.2,1.5,Iris-versicolor
63
+ 6.0,2.2,4.0,1.0,Iris-versicolor
64
+ 6.1,2.9,4.7,1.4,Iris-versicolor
65
+ 5.6,2.9,3.6,1.3,Iris-versicolor
66
+ 6.7,3.1,4.4,1.4,Iris-versicolor
67
+ 5.6,3.0,4.5,1.5,Iris-versicolor
68
+ 5.8,2.7,4.1,1.0,Iris-versicolor
69
+ 6.2,2.2,4.5,1.5,Iris-versicolor
70
+ 5.6,2.5,3.9,1.1,Iris-versicolor
71
+ 5.9,3.2,4.8,1.8,Iris-versicolor
72
+ 6.1,2.8,4.0,1.3,Iris-versicolor
73
+ 6.3,2.5,4.9,1.5,Iris-versicolor
74
+ 6.1,2.8,4.7,1.2,Iris-versicolor
75
+ 6.4,2.9,4.3,1.3,Iris-versicolor
76
+ 6.6,3.0,4.4,1.4,Iris-versicolor
77
+ 6.8,2.8,4.8,1.4,Iris-versicolor
78
+ 6.7,3.0,5.0,1.7,Iris-versicolor
79
+ 6.0,2.9,4.5,1.5,Iris-versicolor
80
+ 5.7,2.6,3.5,1.0,Iris-versicolor
81
+ 5.5,2.4,3.8,1.1,Iris-versicolor
82
+ 5.5,2.4,3.7,1.0,Iris-versicolor
83
+ 5.8,2.7,3.9,1.2,Iris-versicolor
84
+ 6.0,2.7,5.1,1.6,Iris-versicolor
85
+ 5.4,3.0,4.5,1.5,Iris-versicolor
86
+ 6.0,3.4,4.5,1.6,Iris-versicolor
87
+ 6.7,3.1,4.7,1.5,Iris-versicolor
88
+ 6.3,2.3,4.4,1.3,Iris-versicolor
89
+ 5.6,3.0,4.1,1.3,Iris-versicolor
90
+ 5.5,2.5,4.0,1.3,Iris-versicolor
91
+ 5.5,2.6,4.4,1.2,Iris-versicolor
92
+ 6.1,3.0,4.6,1.4,Iris-versicolor
93
+ 5.8,2.6,4.0,1.2,Iris-versicolor
94
+ 5.0,2.3,3.3,1.0,Iris-versicolor
95
+ 5.6,2.7,4.2,1.3,Iris-versicolor
96
+ 5.7,3.0,4.2,1.2,Iris-versicolor
97
+ 5.7,2.9,4.2,1.3,Iris-versicolor
98
+ 6.2,2.9,4.3,1.3,Iris-versicolor
99
+ 5.1,2.5,3.0,1.1,Iris-versicolor
100
+ 5.7,2.8,4.1,1.3,Iris-versicolor
101
+ 6.3,3.3,6.0,2.5,Iris-virginica
102
+ 5.8,2.7,5.1,1.9,Iris-virginica
103
+ 7.1,3.0,5.9,2.1,Iris-virginica
104
+ 6.3,2.9,5.6,1.8,Iris-virginica
105
+ 6.5,3.0,5.8,2.2,Iris-virginica
106
+ 7.6,3.0,6.6,2.1,Iris-virginica
107
+ 4.9,2.5,4.5,1.7,Iris-virginica
108
+ 7.3,2.9,6.3,1.8,Iris-virginica
109
+ 6.7,2.5,5.8,1.8,Iris-virginica
110
+ 7.2,3.6,6.1,2.5,Iris-virginica
111
+ 6.5,3.2,5.1,2.0,Iris-virginica
112
+ 6.4,2.7,5.3,1.9,Iris-virginica
113
+ 6.8,3.0,5.5,2.1,Iris-virginica
114
+ 5.7,2.5,5.0,2.0,Iris-virginica
115
+ 5.8,2.8,5.1,2.4,Iris-virginica
116
+ 6.4,3.2,5.3,2.3,Iris-virginica
117
+ 6.5,3.0,5.5,1.8,Iris-virginica
118
+ 7.7,3.8,6.7,2.2,Iris-virginica
119
+ 7.7,2.6,6.9,2.3,Iris-virginica
120
+ 6.0,2.2,5.0,1.5,Iris-virginica
121
+ 6.9,3.2,5.7,2.3,Iris-virginica
122
+ 5.6,2.8,4.9,2.0,Iris-virginica
123
+ 7.7,2.8,6.7,2.0,Iris-virginica
124
+ 6.3,2.7,4.9,1.8,Iris-virginica
125
+ 6.7,3.3,5.7,2.1,Iris-virginica
126
+ 7.2,3.2,6.0,1.8,Iris-virginica
127
+ 6.2,2.8,4.8,1.8,Iris-virginica
128
+ 6.1,3.0,4.9,1.8,Iris-virginica
129
+ 6.4,2.8,5.6,2.1,Iris-virginica
130
+ 7.2,3.0,5.8,1.6,Iris-virginica
131
+ 7.4,2.8,6.1,1.9,Iris-virginica
132
+ 7.9,3.8,6.4,2.0,Iris-virginica
133
+ 6.4,2.8,5.6,2.2,Iris-virginica
134
+ 6.3,2.8,5.1,1.5,Iris-virginica
135
+ 6.1,2.6,5.6,1.4,Iris-virginica
136
+ 7.7,3.0,6.1,2.3,Iris-virginica
137
+ 6.3,3.4,5.6,2.4,Iris-virginica
138
+ 6.4,3.1,5.5,1.8,Iris-virginica
139
+ 6.0,3.0,4.8,1.8,Iris-virginica
140
+ 6.9,3.1,5.4,2.1,Iris-virginica
141
+ 6.7,3.1,5.6,2.4,Iris-virginica
142
+ 6.9,3.1,5.1,2.3,Iris-virginica
143
+ 5.8,2.7,5.1,1.9,Iris-virginica
144
+ 6.8,3.2,5.9,2.3,Iris-virginica
145
+ 6.7,3.3,5.7,2.5,Iris-virginica
146
+ 6.7,3.0,5.2,2.3,Iris-virginica
147
+ 6.3,2.5,5.0,1.9,Iris-virginica
148
+ 6.5,3.0,5.2,2.0,Iris-virginica
149
+ 6.2,3.4,5.4,2.3,Iris-virginica
150
+ 5.9,3.0,5.1,1.8,Iris-virginica
151
+
@@ -0,0 +1,14 @@
1
+ #It just loads iris data as a 2-d array, rows being samples, columns 0...n-1 being flaot-valued features and column n-1 being target {0,1}
2
+ def load_iris
3
+ ifile = File.open(
4
+ File.join("data","iris.data"))
5
+ iris = ((ifile.readlines.map {|l|
6
+ fields = l.chomp.split(pattern=",")
7
+ fields
8
+ }).select {|fields|
9
+ fields[4] != "Iris-virginica" && fields != []}).map {|fields|
10
+ fields[4] = (fields[4] == "Iris-setosa") ? 1 : 0
11
+ fields.map{|f| f.to_f}}
12
+ ifile.close
13
+ return iris
14
+ end
@@ -0,0 +1,28 @@
1
+ require 'glm'
2
+ require 'pp'
3
+ require 'ruby-debug' ; Debugger.start(:post_mortem => true)
4
+ Debugger.settings[:autoeval] = true
5
+ require 'prepare'
6
+
7
+ T = 2000
8
+
9
+ iris = load_iris
10
+ y_arr = iris.map {|r| r[0]}
11
+ x = iris.map {|r| r[1...-1]}
12
+ x = x.map{|r| r << 1}
13
+ x,y = GLM::Util.formatArrays(x, y_arr)
14
+
15
+ linear = GLM::Linear.new(x, y)
16
+
17
+ (1..T).each do |i|
18
+ linear.sto_update
19
+ est_y = linear.est(x)
20
+ err = (y.zip(est_y).map{|e| (e[0]-e[1])**2}).reduce(:+)/y.row_size
21
+ puts err
22
+ end
23
+
24
+ #end
25
+
26
+ #puts ((y_est - y).map {|e| e ** 2}).reduce(:+)/y.row_size
27
+ # puts "Theta: #{y_est.to_a.to_s}"
28
+
@@ -0,0 +1,26 @@
1
+ require 'glm'
2
+ require 'pp'
3
+ require 'ruby-debug' ; Debugger.start(:post_mortem => true)
4
+ Debugger.settings[:autoeval] = true
5
+ require 'prepare'
6
+
7
+ T = 200
8
+
9
+ iris = load_iris
10
+ y_arr = iris.map {|r| r[-1].to_i}
11
+ x = iris.map {|r| r[0...-2]}
12
+ x = x.map{|r| r << 1}
13
+ x,y = GLM::Util.formatArrays(x, y_arr)
14
+
15
+ glm = GLM::Logit.new(x, y, 0.1)
16
+
17
+ (1..T).each do |i|
18
+ glm.sto_update
19
+
20
+ est_y = glm.est(x)
21
+ err = (y.zip(est_y).map{|e| (e[0]!=e[1])?1:0}).reduce(:+)#/y.row_size
22
+ # pp y.zip(est_y)
23
+
24
+ puts err
25
+ pp glm.theta
26
+ end
@@ -0,0 +1,20 @@
1
+ require 'matrix'
2
+ include Math
3
+
4
+
5
+ #The main class
6
+ #Generalized Linear Models
7
+ #
8
+
9
+ class GLM
10
+
11
+ # Vectorize a function intended for
12
+ def self.vectorize()
13
+ end
14
+
15
+ end
16
+
17
+
18
+ require 'glm/logit'
19
+ require 'glm/linear'
20
+ require 'glm/util'
@@ -0,0 +1,103 @@
1
+ class GLM::Base
2
+
3
+ def initialize(x,y,alpha = 0.1)
4
+ @x = x
5
+ @y = y
6
+ @@alpha = alpha
7
+ @theta = Array.new(x.column_size,1)
8
+ end
9
+
10
+ #Log partition function <b>a(eta)</b>, intended to be overriden
11
+ def a
12
+ raise 'Log partition function a(eta) undefined'
13
+ end
14
+
15
+ #intended to be overriden
16
+ def b
17
+ raise 'b undefined'
18
+ end
19
+
20
+ def format(x)
21
+ if x.is_a? Array
22
+ if x[0].is_a?(Array)
23
+ x.map {|e|
24
+ output(e)}
25
+ else
26
+ output(x)
27
+ end
28
+ #Assuming x.is_a?(Matrix) == true
29
+ else
30
+ x.row_vectors.map {|e|
31
+ output(Matrix.row_vector(e))
32
+ }
33
+ end
34
+ end
35
+
36
+
37
+ # Estimator
38
+ # =Arguments:
39
+ # x: a feature vector in Array
40
+ # =Returns:
41
+ # Estimation
42
+ def est(x)
43
+ format(x)
44
+ end
45
+
46
+ #Output estimation from E(y|theta,x)
47
+ #Need overriding, except for plain linear regression
48
+ def output(x)
49
+ return h(x.t)
50
+ end
51
+
52
+ #Natural parameter eta
53
+ def eta(x)
54
+ tmp = (Matrix.column_vector(@theta).t * x)[0,0]
55
+ return tmp
56
+ end
57
+
58
+
59
+ #Sufficient statistic <b>T</b>
60
+ def T
61
+ return @y
62
+ end
63
+
64
+ #Canonical reponse function, intended to be overriden
65
+ def self.g(eta)
66
+ raise 'Canonical reponse function g(eta) undefined'
67
+ end
68
+
69
+ #Gradient on one sample
70
+ def gradient(x,y,v)
71
+ tmp = h(v)
72
+ return (y - tmp) * x
73
+ end
74
+
75
+ # Hypothesis function, outputs E(y|theta, x), mean of y given x parameterized by theta
76
+ # =Parameters:
77
+ # x: a feature vector
78
+ # =Returns:
79
+ # E(y|theta, x)
80
+ def h(x)
81
+ tmp = eta(x)
82
+ return self.class.g(tmp)
83
+ end
84
+
85
+ #A step based on one sample in stochastic gradient descent
86
+ def single_update()
87
+
88
+ end
89
+
90
+ #One complete loop of stochastic gradient descend
91
+ def sto_update()
92
+ (0...(@x.row_size)).each do |i|
93
+ (0...(@x.column_size)).each do |j|
94
+ @theta[j] += @@alpha * gradient(@x[i,j], @y[i,0], Matrix.column_vector(@x.row(i)))
95
+ end
96
+ end
97
+ end
98
+
99
+ def theta()
100
+ return @theta
101
+ end
102
+
103
+ end
@@ -0,0 +1,27 @@
1
+ require 'glm/base'
2
+
3
+ class GLM::Linear < GLM::Base
4
+
5
+
6
+ def hi
7
+ return "Hi, this is #{self.class}"
8
+ end
9
+
10
+
11
+ # Canonical response function
12
+ # It's identity function here, obviously
13
+ def self.g(eta)
14
+ return eta
15
+ end
16
+
17
+ def ne_est(x)
18
+ @theta = ne_fit
19
+ [@theta, x * Matrix.column_vector(@theta)]
20
+ end
21
+
22
+ #Normal equation fit
23
+ def ne_fit
24
+ (( @x.t * @x ).inverse * @x.t * @y).to_a.flatten
25
+ end
26
+
27
+ end
@@ -0,0 +1,62 @@
1
+ require 'glm/base'
2
+
3
+ class GLM::Logit < GLM::Base
4
+
5
+
6
+ def a
7
+ return -Math.log(1-phi)
8
+ end
9
+
10
+ def b
11
+ return 1
12
+ end
13
+
14
+ # Canonical response function
15
+ def self.g(eta)
16
+ self.sigmoid(eta)
17
+ end
18
+
19
+ def output(x)
20
+ return (h(x.t) > 0.5)?1:0
21
+ end
22
+
23
+ def phi
24
+ return h(x)
25
+ end
26
+
27
+ # Logistic function on vectors, parameterized by theta
28
+ # Arguments:
29
+ # theta: An array
30
+
31
+ def self.sigmoid_vec(theta)
32
+ # Returns a closure which takes
33
+ # Arguments:
34
+ # x: single row matrix
35
+ return lambda {|x|
36
+ sigmoid(
37
+ (Matrix.row_vector(x) * Matrix.row_vector(theta).t).tr)}
38
+ end
39
+
40
+ # Logistic function
41
+ # Arguments:
42
+ # x: scalar
43
+ def self.sigmoid(x)
44
+ return 1/(1 + exp(-x))
45
+ end
46
+
47
+ # Derivative of Logistic function
48
+ # Arguments:
49
+ # x: scalar
50
+ def self.deriv_sigmoid( x )
51
+ return sigmoid( x ) * ( 1 - sigmoid( x ) )
52
+ end
53
+
54
+ def self.logit(z)
55
+ Math.log(z/(1-z))
56
+ end
57
+
58
+ def self.truth
59
+ "Sanity is for the weak!"
60
+ end
61
+
62
+ end
@@ -0,0 +1,9 @@
1
+ require 'matrix'
2
+
3
+ class GLM::Util
4
+ def self.formatArrays(x, y)
5
+ x = Matrix.rows(x)
6
+ y = Matrix.column_vector(y)
7
+ return [x, y]
8
+ end
9
+ end
@@ -0,0 +1,41 @@
1
+ require 'test/unit'
2
+ require 'glm'
3
+ require 'pp'
4
+ require 'ruby-debug' ; Debugger.start(:post_mortem => true)
5
+
6
+ class GLMTest < Test::Unit::TestCase
7
+ def test_glm_logit_truth
8
+ assert_equal "Sanity is for the weak!",
9
+ GLM::Logit.truth
10
+ end
11
+
12
+ #Helper method
13
+ def load_iris
14
+ ifile = File.open(
15
+ File.join("data","iris.data"))
16
+ iris = ((ifile.readlines.map {|l|
17
+ fields = l.chomp.split(pattern=",")
18
+ fields
19
+ }).select {|fields|
20
+ fields[4] != "Iris-virginica" && fields != []}).map {|fields|
21
+ fields[4] = (fields[4] == "Iris-setosa") ? 1 : 0
22
+ fields.map{|f| f.to_f}}
23
+ assert_equal 100, iris.length
24
+ ifile.close
25
+ return iris
26
+ end
27
+
28
+ def test_linear
29
+ iris = load_iris
30
+ y = iris.map {|r| r[0]}
31
+ x = iris.map {|r| r[1...-1]}
32
+ x = x.map{|r| r << 1}
33
+ x,y = GLM::Util.formatArrays(x, y)
34
+ linear = GLM::Linear.new(x, y)
35
+ theta, y_est = linear.ne_est(x)
36
+ assert ((y_est - y).map {|e| e ** 2}).reduce(:+)/y.row_size < 0.1
37
+ end
38
+
39
+
40
+
41
+ end
metadata ADDED
@@ -0,0 +1,54 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: glm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Yu Shen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-19 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: Generalized Linear Method
15
+ email: yushen83@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/glm.rb
21
+ - lib/glm/base.rb
22
+ - lib/glm/linear.rb
23
+ - lib/glm/logit.rb
24
+ - lib/glm/util.rb
25
+ - examples/prepare.rb
26
+ - examples/test_glm_logit.rb
27
+ - examples/test_glm_linear.rb
28
+ - test/test_glm.rb
29
+ - data/iris.data
30
+ homepage: https://github.com/yushen
31
+ licenses: []
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ requirements: []
49
+ rubyforge_project:
50
+ rubygems_version: 1.8.19
51
+ signing_key:
52
+ specification_version: 3
53
+ summary: This is a W.I.P implementation of GLM
54
+ test_files: []