ml 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -5,6 +5,9 @@ Machine learning library for ruby
5
5
  # Algorithm Implemented
6
6
 
7
7
  * Perceptron Learning Algorithm
8
+ * Adaptive Perceptron (Adaline) Learning Algorithm
9
+ * Pocket Learning Algorithm
10
+ * Decision Stump Learning Algorithm
8
11
 
9
12
  # Tools
10
13
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.3.0
@@ -2,68 +2,28 @@ require 'matrix'
2
2
 
3
3
  module ML
4
4
  module Data
5
- # Generating sample points on 2D plane
6
- class Generator2D
7
- # Generate point from line
8
- #
9
- # @param [Array] coef [a,b,c] for ax+by+c=0
10
- # @param [Number] x x value
11
- # @return [Array] point
12
- def self.point_from_line coef, x
13
- [x, (-coef[2]-(coef[0] * x))/coef[1]]
14
- end
15
-
16
- # Initialize a generator
17
- #
18
- # @param [Integer] x_range x range
19
- # @param [Integer] y_range y range
20
- def initialize x_range = 100, y_range = 100
21
- @x_range = x_range
22
- @y_range = y_range
23
- end
24
-
25
- # Generate two groups of points on 2d plain
26
- #
27
- # @param [Integer] points the number of points of each set
28
- # @param [Array] coef [a,b,c] for ax+by+c=0
29
- # @return [Hash] key: points, value: supervised value
30
- def points_2d points, coef = [-1.0, 1.0, 0.0]
31
- result = {}
32
- # for each group
33
- [1, -1].each do |grp|
34
- points.times do
35
- while true
36
- point = generate_point
37
- prod = Matrix.column_vector(point).transpose * Matrix.column_vector(coef)
38
- if (prod[0,0] <=> 0) == grp
39
- result[point] = grp
40
- break
41
- end
42
- end
43
- end
44
- end
45
- result
46
- end
47
-
48
- private
49
- def generate_point
50
- [@x_range * rand, @y_range * rand, 1.0]
51
- end
52
- end
53
-
54
5
  # General generator for n-dimentional space
55
6
  class Generator
56
7
  # Initial generator
57
8
  #
58
9
  # @param [Integer] dim dimension
59
- def initialize dim
10
+ # @param [Numeric] scale the magnitude of the vector
11
+ # @param [Numeric] noise the percentage of noise
12
+ # @param [Symbol] model the noise model, #:random# for flipping
13
+ # all the element in a probability, while #:flip# only flips a
14
+ # portion of elements randomly
15
+ def initialize dim, scale = 1, noise = 0, model = :random
60
16
  @dim = dim
17
+ @scale = scale
18
+ @noise = noise
19
+ @model = model
61
20
  end
62
21
 
63
22
  # Generate two groups of points
64
23
  #
65
24
  # @param [Integer] points the number of points of each set
66
- # @param [Array] coef array of the size of dimension to specify the hyper plane
25
+ # @param [Array] coef array of the size of dimension to specify the
26
+ # hyper plane
67
27
  # @return [Hash] key: points, value: supervised value
68
28
  def points points, coef
69
29
  result = {}
@@ -71,27 +31,79 @@ module ML
71
31
  [1, -1].each do |grp|
72
32
  points.times do
73
33
  while true
74
- point = Generator.generate_vector(@dim, 100)
34
+ point = generate_vector
75
35
  prod = Matrix.column_vector(point).transpose * Matrix.column_vector(coef)
76
36
  if (prod[0,0] <=> 0) == grp
77
37
  result[point] = grp
38
+ result[point] *= -1 if @model == :random and rand < @noise
78
39
  break
79
40
  end
80
41
  end
81
42
  end
82
43
  end
44
+
45
+ if @model == :flip and @noise > 0
46
+ flipping = (points * @noise * 2).to_i
47
+ order = (0...(points * 2)).to_a.shuffle
48
+ for i in 0...flipping
49
+ result[result.keys[order[i]]] *= -1
50
+ end
51
+ end
52
+
83
53
  result
84
54
  end
85
55
 
86
56
  # Generating a random vector
87
57
  #
88
58
  # @param [Integer] dim the dimension of the vector
89
- # @param [Integer] scale the scale of each component
59
+ # @param [Integer] scale the scale of each component (default [-1,1])
90
60
  # @return [Array] random vector
91
61
  def self.generate_vector dim, scale = 1
92
- result = Array.new(dim) { (rand - 0.5) * scale }
62
+ result = Array.new(dim) { (rand - 0.5) * 2 * scale }
93
63
  result << 1.0
94
64
  end
65
+
66
+ protected
67
+ def generate_vector
68
+ Generator.generate_vector @dim, @scale
69
+ end
70
+ end
71
+
72
+ # Generating sample points on 2D plane
73
+ class Generator2D < Generator
74
+ # Generate point from line
75
+ #
76
+ # @param [Array] coef [a,b,c] for ax+by+c=0
77
+ # @param [Number] x x value
78
+ # @return [Array] point
79
+ def self.point_from_line coef, x
80
+ [x, (-coef[2]-(coef[0] * x))/coef[1]]
81
+ end
82
+
83
+ # Initialize a generator
84
+ #
85
+ # @param [Integer] x_range x range
86
+ # @param [Integer] y_range y range
87
+ # @param [Numeric] noise the percentage of noise
88
+ def initialize x_range = 100, y_range = 100, noise = 0
89
+ @x_range = x_range
90
+ @y_range = y_range
91
+ @noise = noise
92
+ end
93
+
94
+ # Generate two groups of points on 2d plain
95
+ #
96
+ # @param [Integer] points the number of points of each set
97
+ # @param [Array] coef [a,b,c] for ax+by+c=0
98
+ # @return [Hash] key: points, value: supervised value
99
+ def points_2d points, coef = [-1.0, 1.0, 0.0]
100
+ points(points, coef)
101
+ end
102
+
103
+ protected
104
+ def generate_vector
105
+ [@x_range * rand, @y_range * rand, 1.0]
106
+ end
95
107
  end
96
108
  end
97
109
  end
@@ -0,0 +1,108 @@
1
+ module ML
2
+ module Learner
3
+ # Implementation of decision stump learning
4
+ class DecisionStumpLearner
5
+ # Initialize a decision stump learner
6
+ #
7
+ # @param [Integer] dim dimension
8
+ def initialize dim
9
+ @dim = dim
10
+ @min_error = 1.0/0
11
+ @error_vector = []
12
+ end
13
+
14
+ # Train with a supervised data
15
+ #
16
+ # @param [Hash] data supervised input data (mapping from array to integer)
17
+ # @return [Hash] {error} error of the training data
18
+ def train! data
19
+ for i in 0...@dim
20
+ hypo, error = search data, i
21
+ update_hypo hypo, error
22
+ @error_vector[i] = error
23
+ end
24
+
25
+ {:error => @min_error}
26
+ end
27
+
28
+ # Predict certain data
29
+ #
30
+ # @param [Array] data data in question
31
+ # @return [Integer] prediction
32
+ def predict data
33
+ classify data, @best_hypo
34
+ end
35
+
36
+ # Error vector of each dimension
37
+ #
38
+ # @return [Array] the error vector
39
+ def error_vector
40
+ @error_vector
41
+ end
42
+
43
+ # Get the hypothesis vector
44
+ #
45
+ # Format of hypothesis vector
46
+ # h_{s,i,t}(x) = s sign((x)_i - t)
47
+ #
48
+ # @return [Array] [s, i, t] vector
49
+ def hypothesis
50
+ @best_hypo
51
+ end
52
+
53
+ private
54
+ def classify data, hypo
55
+ val = data[hypo[1]] - hypo[2]
56
+ sign = (val > 0) ? 1 : -1
57
+ hypo[0] * sign
58
+ end
59
+
60
+ def update_hypo hypo, error
61
+ if error < @min_error
62
+ @best_hypo = hypo
63
+ @min_error = error
64
+ end
65
+ end
66
+
67
+ def search data, dim
68
+ pool = data.to_a.sort_by {|line| line[0][dim]}
69
+ max_diff, index = 0, nil
70
+ pcount, ncount = 0, 0
71
+
72
+ pool.each_with_index do |dat, i|
73
+ if dat[1] == 1
74
+ pcount += 1
75
+ else
76
+ ncount += 1
77
+ end
78
+
79
+ if (pcount - ncount).abs > max_diff.abs
80
+ max_diff = pcount - ncount
81
+ index = i
82
+ end
83
+ end
84
+
85
+ thres = if index == pool.size - 1
86
+ pool[-1][0][dim] + 0.01
87
+ else
88
+ (pool[index][0][dim] + pool[index+1][0][dim]) / 2.0
89
+ end
90
+ hypo = if max_diff > 0
91
+ [-1, dim, thres]
92
+ else
93
+ [1, dim, thres]
94
+ end
95
+
96
+ [hypo, classify_error(pool, hypo)]
97
+ end
98
+
99
+ def classify_error data, hypo
100
+ error = 0
101
+ for dat, result in data
102
+ error += 1 unless classify(dat, hypo) == result
103
+ end
104
+ error
105
+ end
106
+ end
107
+ end
108
+ end
@@ -7,7 +7,7 @@ module ML
7
7
  # Initialize a perceptron learner
8
8
  #
9
9
  # @param [Integer] dim the number of dimension
10
- def initialize dim, thres = 1.0/0
10
+ def initialize dim
11
11
  @dim = dim
12
12
  @w = Matrix.column_vector(Array.new(dim + 1, 0))
13
13
  end
@@ -16,16 +16,15 @@ module ML
16
16
  #
17
17
  # @param [Hash] data supervised input data (mapping from array to integer)
18
18
  # @param [Numeric] threshold the upper bound of the traning iteration
19
- # @return [Array] error_and_update [error, update] error in traning and update numbers used
19
+ # @return [Hash] {error, update_count} error in traning and update numbers used
20
20
  def train! data, threshold = 1.0/0
21
21
  pool = data.to_a
22
22
  update = 0
23
- error = 0
24
23
 
25
24
  while true
26
25
  break if update >= threshold
27
26
  misclassified = false
28
- order = (1...(pool.size)).to_a.shuffle
27
+ order = (0...(pool.size)).to_a.shuffle
29
28
 
30
29
  for i in order
31
30
  dat, result = pool[i]
@@ -44,14 +43,13 @@ module ML
44
43
  end
45
44
 
46
45
  # check out errors
47
- if update >= threshold
48
- for dat, result in pool
49
- classified_result = (classify(Matrix.column_vector(dat)) <=> 0)
50
- error += 1 unless result == classified_result
51
- end
52
- end
46
+ error = if update >= threshold
47
+ classify_error pool
48
+ else
49
+ 0
50
+ end
53
51
 
54
- [error, update]
52
+ {:error => error, :update_count => update}
55
53
  end
56
54
 
57
55
  # The final coefficient of the line
@@ -81,6 +79,17 @@ module ML
81
79
  def update_vector x, y
82
80
  @w = @w + y * x
83
81
  end
82
+
83
+ def classify_error supervised_data
84
+ error = 0
85
+
86
+ for data, result in supervised_data
87
+ classified_result = (classify(Matrix.column_vector(data)) <=> 0)
88
+ error += 1 unless result == classified_result
89
+ end
90
+
91
+ error
92
+ end
84
93
  end
85
94
  end
86
95
  end
@@ -0,0 +1,38 @@
1
+ module ML
2
+ module Learner
3
+ # Implementation of pocket learning algorithm
4
+ class PocketLearner < PerceptronLearner
5
+ # Train with supervised data
6
+ #
7
+ # @param [Hash] data supervised input data (mapping from array to integer)
8
+ # @param [Integer] iteration the number of the iterations
9
+ def train! data, iteration
10
+ pool = data.to_a
11
+ best_error, pocket = 1.0/0, nil
12
+
13
+ iteration.times do
14
+ # update pocket
15
+ error = classify_error pool
16
+ if error < best_error
17
+ error = best_error
18
+ pocket = @w.dup
19
+ end
20
+ break if best_error == 0
21
+
22
+ # the random order
23
+ order = (1...(pool.size)).to_a.shuffle
24
+
25
+ for i in order
26
+ dat, result = pool[i]
27
+ aug_data = Matrix.column_vector(dat)
28
+
29
+ if wrongly_classify aug_data, result
30
+ update_vector aug_data, result
31
+ break
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
data/lib/ml.rb CHANGED
@@ -7,6 +7,8 @@ require 'data/parser'
7
7
 
8
8
  require 'method/perceptron'
9
9
  require 'method/adaptive_perceptron'
10
+ require 'method/pocket'
11
+ require 'method/decision_stump'
10
12
 
11
13
  # Top namespace for machine learning algorithms
12
14
  module ML
@@ -19,4 +21,4 @@ module ML
19
21
  end
20
22
  end
21
23
 
22
- MachingLearning = ML
24
+ MachineLearning = ML
data/ml.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "ml"
8
- s.version = "0.2.0"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Andrew Liu"]
12
- s.date = "2011-10-17"
12
+ s.date = "2011-11-17"
13
13
  s.description = "Machine learning library in Ruby"
14
14
  s.email = "andrewliu33@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -27,7 +27,9 @@ Gem::Specification.new do |s|
27
27
  "lib/data/parser.rb",
28
28
  "lib/data/plotter.rb",
29
29
  "lib/method/adaptive_perceptron.rb",
30
+ "lib/method/decision_stump.rb",
30
31
  "lib/method/perceptron.rb",
32
+ "lib/method/pocket.rb",
31
33
  "lib/ml.rb",
32
34
  "ml.gemspec",
33
35
  "spec/data_spec.rb",
@@ -8,7 +8,9 @@ describe "Learner" do
8
8
  generator = ML::Data::Generator2D.new
9
9
  data = generator.points_2d(10)
10
10
 
11
- error, update_count = learner.train! data
11
+ response = learner.train! data
12
+ error = response[:error]
13
+ update_count = response[:update_count]
12
14
 
13
15
  line = learner.line
14
16
  line.should.kind_of?(Array).should == true
@@ -23,7 +25,9 @@ describe "Learner" do
23
25
  generator = ML::Data::Generator.new(4)
24
26
  data = generator.points(10, ML::Data::Generator.generate_vector(4))
25
27
 
26
- error, update_count = learner.train! data
28
+ response = learner.train! data
29
+ error = response[:error]
30
+ update_count = response[:update_count]
27
31
 
28
32
  line = learner.line
29
33
  line.should.kind_of?(Array).should == true
@@ -37,10 +41,12 @@ describe "Learner" do
37
41
  it "should run adaptive perceptron learning in hyperspace" do
38
42
  learner = ML::Learner::AdaptivePerceptronLearner.new(4, 0.1)
39
43
 
40
- generator = ML::Data::Generator.new(4)
44
+ generator = ML::Data::Generator.new(4, 100)
41
45
  data = generator.points(10, ML::Data::Generator.generate_vector(4))
42
46
 
43
- error, update_count = learner.train! data, 1000
47
+ response = learner.train! data, 1000
48
+ error = response[:error]
49
+ update_count = response[:update_count]
44
50
 
45
51
  line = learner.line
46
52
  line.should.kind_of?(Array).should == true
@@ -50,4 +56,56 @@ describe "Learner" do
50
56
  update_count.should < 1000
51
57
  end
52
58
  end
59
+
60
+ describe "Pocket Learner" do
61
+ it "should run pocket perceptron learning in hyperspace" do
62
+ learner = ML::Learner::PocketLearner.new(4)
63
+
64
+ generator = ML::Data::Generator.new(4)
65
+ data = generator.points(10, ML::Data::Generator.generate_vector(4))
66
+
67
+ learner.train! data, 1000
68
+
69
+ line = learner.line
70
+ line.should.kind_of?(Array).should == true
71
+ line.size.should == 5
72
+ end
73
+
74
+ it "should run pocket perceptron learning in noisy data" do
75
+ learner = ML::Learner::PocketLearner.new(4)
76
+
77
+ generator = ML::Data::Generator.new(4, 1, 0.1)
78
+ data = generator.points(10, ML::Data::Generator.generate_vector(4))
79
+
80
+ learner.train! data, 1000
81
+
82
+ line = learner.line
83
+ line.should.kind_of?(Array).should == true
84
+ line.size.should == 5
85
+ end
86
+ end
87
+
88
+ describe "Decision Stump Learner" do
89
+ it "should run decision stump learning in hyperspace" do
90
+ learner = ML::Learner::DecisionStumpLearner.new(4)
91
+
92
+ generator = ML::Data::Generator.new(4)
93
+ data = generator.points(10, ML::Data::Generator.generate_vector(4))
94
+
95
+ learner.train! data
96
+ vector = learner.error_vector
97
+ vector.size.should == 4
98
+ end
99
+
100
+ it "should run decision stump learning in noisy data" do
101
+ learner = ML::Learner::DecisionStumpLearner.new(4)
102
+
103
+ generator = ML::Data::Generator.new(4, 1, 0.1)
104
+ data = generator.points(10, ML::Data::Generator.generate_vector(4))
105
+
106
+ learner.train! data
107
+ vector = learner.error_vector
108
+ vector.size.should == 4
109
+ end
110
+ end
53
111
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-17 00:00:00.000000000Z
12
+ date: 2011-11-17 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rubyvis
16
- requirement: &2157025440 !ruby/object:Gem::Requirement
16
+ requirement: &2159119320 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2157025440
24
+ version_requirements: *2159119320
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: nokogiri
27
- requirement: &2157024740 !ruby/object:Gem::Requirement
27
+ requirement: &2159118600 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2157024740
35
+ version_requirements: *2159118600
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: bacon
38
- requirement: &2157023900 !ruby/object:Gem::Requirement
38
+ requirement: &2159117940 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2157023900
46
+ version_requirements: *2159117940
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: yard
49
- requirement: &2157022940 !ruby/object:Gem::Requirement
49
+ requirement: &2159117460 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.6.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2157022940
57
+ version_requirements: *2159117460
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: bundler
60
- requirement: &2157022300 !ruby/object:Gem::Requirement
60
+ requirement: &2159116740 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2157022300
68
+ version_requirements: *2159116740
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: jeweler
71
- requirement: &2157021360 !ruby/object:Gem::Requirement
71
+ requirement: &2159116240 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.6.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2157021360
79
+ version_requirements: *2159116240
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: rcov
82
- requirement: &2157020280 !ruby/object:Gem::Requirement
82
+ requirement: &2159109840 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2157020280
90
+ version_requirements: *2159109840
91
91
  description: Machine learning library in Ruby
92
92
  email: andrewliu33@gmail.com
93
93
  executables: []
@@ -106,7 +106,9 @@ files:
106
106
  - lib/data/parser.rb
107
107
  - lib/data/plotter.rb
108
108
  - lib/method/adaptive_perceptron.rb
109
+ - lib/method/decision_stump.rb
109
110
  - lib/method/perceptron.rb
111
+ - lib/method/pocket.rb
110
112
  - lib/ml.rb
111
113
  - ml.gemspec
112
114
  - spec/data_spec.rb
@@ -127,7 +129,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
127
129
  version: '0'
128
130
  segments:
129
131
  - 0
130
- hash: -383772979455407848
132
+ hash: 2827153144592279610
131
133
  required_rubygems_version: !ruby/object:Gem::Requirement
132
134
  none: false
133
135
  requirements: