pa_learner 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ $LOAD_PATH << "."
2
+
3
+ source "http://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in pa_learner.gemspec
6
+ gemspec
7
+
8
+ gem 'growl'
9
+ gem 'guard-minitest'
10
+ gem 'rb-fsevent'
@@ -0,0 +1,45 @@
1
+ #The what?
2
+ Online learning algorithm: Lightweight, that-works
3
+
4
+ Implementation of a ``Passive-Aggressive'' online learning algorithm. The Passive-Aggressive online learning framework defines a family of online margin based linear learners. For further details see: 'Online Passive-Aggressive Algorithms' by Crammer et al. JMLR, 2006.
5
+ - - -
6
+ #The how?
7
+
8
+ ##Usage
9
+ >x is a vector (Array) of dim elements
10
+ >y is its corresponding value (label -or- functional value)
11
+ ###Init
12
+ >pal = PaLearner::DistRegressor.new( dim )
13
+ ###Update
14
+ >pal.update!(x, y)
15
+ ###Estimate
16
+ >pal.estimate( x )
17
+
18
+ ##Examples
19
+ >d = [0.25, 0.75]
20
+ >@data = Array.new(100)
21
+ >(0..@data.size).each do |i|
22
+ > x_1 = rand
23
+ > x_2 = 1 - x_1
24
+ > @data[i] = {:y=>d[0]*x_1+d[1]*x_2 , :x=>[x_1, x_2]}
25
+ >end
26
+ >puts "data--size = #{@data.size}"
27
+ >
28
+ >pal = PaLearner::DistRegressor.new( 2 )
29
+ >
30
+ >@data.shuffle.inject(pal) { |pal, yx| pal.update!(yx[:x], yx[:y]); pal; }
31
+ >@data.shuffle.inject(pal) { |pal, yx| pal.update!(yx[:x], yx[:y]); pal; }
32
+ >
33
+ >puts "pal.estimate([1,0]) = #{pal.estimate( [1, 0] )}"
34
+ >puts "pal.estimate([0,1]) = #{pal.estimate( [0, 1] )}"
35
+
36
+ #License
37
+ ##The MIT License
38
+ ``
39
+ Copyright (c) 2010 ronbee.github@gmail.com
40
+
41
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
42
+
43
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
44
+
45
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.''
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,97 @@
1
+ module PaLearner
2
+ EPS_TO_AVOID_ZERO_DIV = 0.000000001
3
+ attr :w
4
+
5
+ def estimate(x)
6
+ inner_product( x, @w)
7
+ end
8
+
9
+ def bin_classify(x)
10
+ sign( estimate(x) )
11
+ end
12
+
13
+ class PA
14
+ include PaLearner
15
+
16
+ def initialize(n, type=:pa, cost = 0)
17
+ pa_kickstart( n )
18
+ @type = type
19
+ @cost = cost
20
+ self
21
+ end
22
+
23
+ def update!(x, y)
24
+ l = hinge_loss( x, y )
25
+ etta = calc_update_delta( l, x)
26
+ @w.each_index { |i| @w[i] += y*etta*x[i] }
27
+ self
28
+ end
29
+
30
+ private
31
+ def calc_update_delta(l,x)
32
+ delta = l / ( inner_product( x, x ) + EPS_TO_AVOID_ZERO_DIV ) if @type == :pa
33
+ delta = [ @cost, l / ( inner_product( x, x ) + EPS_TO_AVOID_ZERO_DIV ) ].min if @type == :pa_I
34
+ delta = l / ( inner_product( x, x ) + 1/(2+@cost) ) if @type == :pa_II
35
+ delta
36
+ end
37
+ end
38
+
39
+ class DistRegressor
40
+ include PaLearner
41
+ def initialize( n, eps=0.001)
42
+ pa_kickstart( n )
43
+ @eps = eps
44
+ end
45
+
46
+ def update!(x,y)
47
+ rate = sign( estimate(x) - y ) * ( loss( estimate(x), y ) / ( inner_product(x, x) + EPS_TO_AVOID_ZERO_DIV ) )
48
+ rate_x = x.map {|v| -Math::log(v) * rate }
49
+ @w = @w.zip( rate_x ).map{ |v| v.first + v.last }
50
+ project_to_simplex!(@w)
51
+ self
52
+ end
53
+
54
+ #------
55
+ private
56
+
57
+ def loss(y_e, y)
58
+ [ 0, (y_e - y).abs - @eps ].max
59
+ end
60
+
61
+ def project_to_simplex!( w )
62
+ working_ind = w.inject( {} ){|m,v| m[m.size]=v; m }.keys
63
+ last_size = working_ind.size + 1
64
+ while working_ind.size < last_size && working_ind.size > 0
65
+ last_size = working_ind.size
66
+ update = (working_ind.inject(0){|s,i| s+w[i]} -1) / working_ind.size.to_f
67
+ working_ind.reject! do |i|
68
+ w[i] -= update
69
+ w[i] = 0 if w[i] < 0
70
+ w[i] == 0
71
+ end
72
+ end
73
+ w
74
+ end
75
+ end
76
+
77
+ #------
78
+ private
79
+
80
+ def pa_kickstart(n)
81
+ @w = Array.new(n, 0)
82
+ end
83
+
84
+ def inner_product(a, b)
85
+ a.zip(b).map {|v| v.first*v.last}.inject(0){|sum,v| sum+v }
86
+ end
87
+
88
+ def hinge_loss(x,y)
89
+ [0, 1 - inner_product(@w, x) * y].max
90
+ end
91
+
92
+ def sign( v )
93
+ v >=0 ? 1 : -1
94
+ end
95
+
96
+ end
97
+
@@ -0,0 +1,3 @@
1
+ module PaLearner
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "pa_learner/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "pa_learner"
7
+ s.version = PaLearner::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["ronbee"]
10
+ s.email = ["ronbee.github@gmail.com"]
11
+ s.homepage = "https://github.com/ronbee"
12
+ s.summary = %q{Implementing Online Passive-Aggressive Algorithms}
13
+ s.description = %q{Passive-Aggressive algorithms are a family of online margin based linear lerners. For further details see: 'Online Passive-Aggressive Algorithms' by Crammer et al. JMLR, 2006.}
14
+
15
+ s.rubyforge_project = "pa_learner"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+ end
@@ -0,0 +1,49 @@
1
+ require 'minitest/unit'
2
+
3
+ require File.dirname(__FILE__) + '/../lib/pa_learner'
4
+
5
+ class MatchTest < MiniTest::Unit::TestCase
6
+
7
+ def setup
8
+ d = [0.25, 0.75] # ground truth -- we will use it for testing both distribution tracking, and classification
9
+ @data = Array.new(100)
10
+ (0..@data.size).each do |i|
11
+ x_1 = rand
12
+ x_2 = 1 - x_1
13
+ @data[i] = {:y=>d[0]*x_1+d[1]*x_2 , :x=>[x_1, x_2]}
14
+ end
15
+ end
16
+
17
+ def test_sanity_distribution_tracking
18
+ pal = PaLearner::DistRegressor.new( 2 )
19
+
20
+ @data.shuffle.inject(pal) { |pal, yx| pal.update!(yx[:x], yx[:y]); pal; }
21
+ @data.shuffle.inject(pal) { |pal, yx| pal.update!(yx[:x], yx[:y]); pal; }
22
+
23
+ puts "pal.estimate([1,0]) = #{pal.estimate( [1, 0] )}"
24
+ puts "pal.estimate([0,1]) = #{pal.estimate( [0, 1] )}"
25
+
26
+ assert( ( (pal.estimate( [1, 0] )-0.25).abs - 0.01) < 0, "epsilon error larger than 0.01" )
27
+ end
28
+
29
+ def test_sanity_classify_pa_0
30
+ pal = PaLearner::PA.new( 2 )
31
+ @data.shuffle.inject(pal) { |pal, yx| pal.update!(yx[:x], yx[:y]>=0 ? 1 : -1); pal; }
32
+ err_rate = ( @data.inject(0) {|errs,yx| errs += 1 unless pal.bin_classify(yx[:x]) == (yx[:y]>=0 ? 1 : -1); errs } ) / @data.size.to_f
33
+ assert( err_rate < 0.0001, "PA-0 error rate is too high: #{err_rate}" )
34
+ end
35
+
36
+ def test_sanity_classify_pa_I
37
+ pal = PaLearner::PA.new( 2, :pa_I, 10 )
38
+ @data.shuffle.inject(pal) { |pal, yx| pal.update!(yx[:x], yx[:y]>=0 ? 1 : -1); pal; }
39
+ err_rate = ( @data.inject(0) {|errs,yx| errs += 1 unless pal.bin_classify(yx[:x]) == (yx[:y]>=0 ? 1 : -1); errs } ) / @data.size.to_f
40
+ assert( err_rate < 0.0001, "PA-I error rate is too high: #{err_rate}" )
41
+ end
42
+
43
+ def test_sanity_classify_pa_II
44
+ pal = PaLearner::PA.new( 2, :pa_II, 10 )
45
+ @data.shuffle.inject(pal) { |pal, yx| pal.update!(yx[:x], yx[:y]>=0 ? 1 : -1); pal; }
46
+ err_rate = ( @data.inject(0) {|errs,yx| errs += 1 unless pal.bin_classify(yx[:x]) == (yx[:y]>=0 ? 1 : -1); errs } ) / @data.size.to_f
47
+ assert( err_rate < 0.0001, "PA-II error rate is too high: #{err_rate}" )
48
+ end
49
+ end
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pa_learner
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.1
6
+ platform: ruby
7
+ authors:
8
+ - ronbee
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-08-13 00:00:00 Z
14
+ dependencies: []
15
+
16
+ description: "Passive-Aggressive algorithms are a family of online margin based linear lerners. For further details see: 'Online Passive-Aggressive Algorithms' by Crammer et al. JMLR, 2006."
17
+ email:
18
+ - ronbee.github@gmail.com
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - Gemfile
27
+ - README.md
28
+ - Rakefile
29
+ - lib/pa_learner.rb
30
+ - lib/pa_learner/version.rb
31
+ - pa_learner.gemspec
32
+ - test/pa_learner_test.rb
33
+ homepage: https://github.com/ronbee
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options: []
38
+
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ requirements: []
54
+
55
+ rubyforge_project: pa_learner
56
+ rubygems_version: 1.8.8
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: Implementing Online Passive-Aggressive Algorithms
60
+ test_files:
61
+ - test/pa_learner_test.rb