ml 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -5,6 +5,9 @@ Machine learning library for ruby
5
5
  # Algorithm Implemented
6
6
 
7
7
  * Perceptron Learning Algorithm
8
+ * Adaptive Perceptron (Adaline) Learning Algorithm
9
+ * Pocket Learning Algorithm
10
+ * Decision Stump Learning Algorithm
8
11
 
9
12
  # Tools
10
13
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.3.0
@@ -2,68 +2,28 @@ require 'matrix'
2
2
 
3
3
  module ML
4
4
  module Data
5
- # Generating sample points on 2D plane
6
- class Generator2D
7
- # Generate point from line
8
- #
9
- # @param [Array] coef [a,b,c] for ax+by+c=0
10
- # @param [Number] x x value
11
- # @return [Array] point
12
- def self.point_from_line coef, x
13
- [x, (-coef[2]-(coef[0] * x))/coef[1]]
14
- end
15
-
16
- # Initialize a generator
17
- #
18
- # @param [Integer] x_range x range
19
- # @param [Integer] y_range y range
20
- def initialize x_range = 100, y_range = 100
21
- @x_range = x_range
22
- @y_range = y_range
23
- end
24
-
25
- # Generate two groups of points on 2d plain
26
- #
27
- # @param [Integer] points the number of points of each set
28
- # @param [Array] coef [a,b,c] for ax+by+c=0
29
- # @return [Hash] key: points, value: supervised value
30
- def points_2d points, coef = [-1.0, 1.0, 0.0]
31
- result = {}
32
- # for each group
33
- [1, -1].each do |grp|
34
- points.times do
35
- while true
36
- point = generate_point
37
- prod = Matrix.column_vector(point).transpose * Matrix.column_vector(coef)
38
- if (prod[0,0] <=> 0) == grp
39
- result[point] = grp
40
- break
41
- end
42
- end
43
- end
44
- end
45
- result
46
- end
47
-
48
- private
49
- def generate_point
50
- [@x_range * rand, @y_range * rand, 1.0]
51
- end
52
- end
53
-
54
5
  # General generator for n-dimentional space
55
6
  class Generator
56
7
  # Initial generator
57
8
  #
58
9
  # @param [Integer] dim dimension
59
- def initialize dim
10
+ # @param [Numeric] scale the magnitude of the vector
11
+ # @param [Numeric] noise the percentage of noise
12
+ # @param [Symbol] model the noise model, #:random# for flipping
13
+ # all the element in a probability, while #:flip# only flips a
14
+ # portion of elements randomly
15
+ def initialize dim, scale = 1, noise = 0, model = :random
60
16
  @dim = dim
17
+ @scale = scale
18
+ @noise = noise
19
+ @model = model
61
20
  end
62
21
 
63
22
  # Generate two groups of points
64
23
  #
65
24
  # @param [Integer] points the number of points of each set
66
- # @param [Array] coef array of the size of dimension to specify the hyper plane
25
+ # @param [Array] coef array of the size of dimension to specify the
26
+ # hyper plane
67
27
  # @return [Hash] key: points, value: supervised value
68
28
  def points points, coef
69
29
  result = {}
@@ -71,27 +31,79 @@ module ML
71
31
  [1, -1].each do |grp|
72
32
  points.times do
73
33
  while true
74
- point = Generator.generate_vector(@dim, 100)
34
+ point = generate_vector
75
35
  prod = Matrix.column_vector(point).transpose * Matrix.column_vector(coef)
76
36
  if (prod[0,0] <=> 0) == grp
77
37
  result[point] = grp
38
+ result[point] *= -1 if @model == :random and rand < @noise
78
39
  break
79
40
  end
80
41
  end
81
42
  end
82
43
  end
44
+
45
+ if @model == :flip and @noise > 0
46
+ flipping = (points * @noise * 2).to_i
47
+ order = (0...(points * 2)).to_a.shuffle
48
+ for i in 0...flipping
49
+ result[result.keys[order[i]]] *= -1
50
+ end
51
+ end
52
+
83
53
  result
84
54
  end
85
55
 
86
56
  # Generating a random vector
87
57
  #
88
58
  # @param [Integer] dim the dimension of the vector
89
- # @param [Integer] scale the scale of each component
59
+ # @param [Integer] scale the scale of each component (default [-1,1])
90
60
  # @return [Array] random vector
91
61
  def self.generate_vector dim, scale = 1
92
- result = Array.new(dim) { (rand - 0.5) * scale }
62
+ result = Array.new(dim) { (rand - 0.5) * 2 * scale }
93
63
  result << 1.0
94
64
  end
65
+
66
+ protected
67
+ def generate_vector
68
+ Generator.generate_vector @dim, @scale
69
+ end
70
+ end
71
+
72
+ # Generating sample points on 2D plane
73
+ class Generator2D < Generator
74
+ # Generate point from line
75
+ #
76
+ # @param [Array] coef [a,b,c] for ax+by+c=0
77
+ # @param [Number] x x value
78
+ # @return [Array] point
79
+ def self.point_from_line coef, x
80
+ [x, (-coef[2]-(coef[0] * x))/coef[1]]
81
+ end
82
+
83
+ # Initialize a generator
84
+ #
85
+ # @param [Integer] x_range x range
86
+ # @param [Integer] y_range y range
87
+ # @param [Numeric] noise the percentage of noise
88
+ def initialize x_range = 100, y_range = 100, noise = 0
89
+ @x_range = x_range
90
+ @y_range = y_range
91
+ @noise = noise
92
+ end
93
+
94
+ # Generate two groups of points on 2d plain
95
+ #
96
+ # @param [Integer] points the number of points of each set
97
+ # @param [Array] coef [a,b,c] for ax+by+c=0
98
+ # @return [Hash] key: points, value: supervised value
99
+ def points_2d points, coef = [-1.0, 1.0, 0.0]
100
+ points(points, coef)
101
+ end
102
+
103
+ protected
104
+ def generate_vector
105
+ [@x_range * rand, @y_range * rand, 1.0]
106
+ end
95
107
  end
96
108
  end
97
109
  end
@@ -0,0 +1,108 @@
1
+ module ML
2
+ module Learner
3
+ # Implementation of decision stump learning
4
+ class DecisionStumpLearner
5
+ # Initialize a decision stump learner
6
+ #
7
+ # @param [Integer] dim dimension
8
+ def initialize dim
9
+ @dim = dim
10
+ @min_error = 1.0/0
11
+ @error_vector = []
12
+ end
13
+
14
+ # Train with a supervised data
15
+ #
16
+ # @param [Hash] data supervised input data (mapping from array to integer)
17
+ # @return [Hash] {error} error of the training data
18
+ def train! data
19
+ for i in 0...@dim
20
+ hypo, error = search data, i
21
+ update_hypo hypo, error
22
+ @error_vector[i] = error
23
+ end
24
+
25
+ {:error => @min_error}
26
+ end
27
+
28
+ # Predict certain data
29
+ #
30
+ # @param [Array] data data in question
31
+ # @return [Integer] prediction
32
+ def predict data
33
+ classify data, @best_hypo
34
+ end
35
+
36
+ # Error vector of each dimension
37
+ #
38
+ # @return [Array] the error vector
39
+ def error_vector
40
+ @error_vector
41
+ end
42
+
43
+ # Get the hypothesis vector
44
+ #
45
+ # Format of hypothesis vector
46
+ # h_{s,i,t}(x) = s sign((x)_i - t)
47
+ #
48
+ # @return [Array] [s, i, t] vector
49
+ def hypothesis
50
+ @best_hypo
51
+ end
52
+
53
+ private
54
+ def classify data, hypo
55
+ val = data[hypo[1]] - hypo[2]
56
+ sign = (val > 0) ? 1 : -1
57
+ hypo[0] * sign
58
+ end
59
+
60
+ def update_hypo hypo, error
61
+ if error < @min_error
62
+ @best_hypo = hypo
63
+ @min_error = error
64
+ end
65
+ end
66
+
67
+ def search data, dim
68
+ pool = data.to_a.sort_by {|line| line[0][dim]}
69
+ max_diff, index = 0, nil
70
+ pcount, ncount = 0, 0
71
+
72
+ pool.each_with_index do |dat, i|
73
+ if dat[1] == 1
74
+ pcount += 1
75
+ else
76
+ ncount += 1
77
+ end
78
+
79
+ if (pcount - ncount).abs > max_diff.abs
80
+ max_diff = pcount - ncount
81
+ index = i
82
+ end
83
+ end
84
+
85
+ thres = if index == pool.size - 1
86
+ pool[-1][0][dim] + 0.01
87
+ else
88
+ (pool[index][0][dim] + pool[index+1][0][dim]) / 2.0
89
+ end
90
+ hypo = if max_diff > 0
91
+ [-1, dim, thres]
92
+ else
93
+ [1, dim, thres]
94
+ end
95
+
96
+ [hypo, classify_error(pool, hypo)]
97
+ end
98
+
99
+ def classify_error data, hypo
100
+ error = 0
101
+ for dat, result in data
102
+ error += 1 unless classify(dat, hypo) == result
103
+ end
104
+ error
105
+ end
106
+ end
107
+ end
108
+ end
@@ -7,7 +7,7 @@ module ML
7
7
  # Initialize a perceptron learner
8
8
  #
9
9
  # @param [Integer] dim the number of dimension
10
- def initialize dim, thres = 1.0/0
10
+ def initialize dim
11
11
  @dim = dim
12
12
  @w = Matrix.column_vector(Array.new(dim + 1, 0))
13
13
  end
@@ -16,16 +16,15 @@ module ML
16
16
  #
17
17
  # @param [Hash] data supervised input data (mapping from array to integer)
18
18
  # @param [Numeric] threshold the upper bound of the traning iteration
19
- # @return [Array] error_and_update [error, update] error in traning and update numbers used
19
+ # @return [Hash] {error, update_count} error in traning and update numbers used
20
20
  def train! data, threshold = 1.0/0
21
21
  pool = data.to_a
22
22
  update = 0
23
- error = 0
24
23
 
25
24
  while true
26
25
  break if update >= threshold
27
26
  misclassified = false
28
- order = (1...(pool.size)).to_a.shuffle
27
+ order = (0...(pool.size)).to_a.shuffle
29
28
 
30
29
  for i in order
31
30
  dat, result = pool[i]
@@ -44,14 +43,13 @@ module ML
44
43
  end
45
44
 
46
45
  # check out errors
47
- if update >= threshold
48
- for dat, result in pool
49
- classified_result = (classify(Matrix.column_vector(dat)) <=> 0)
50
- error += 1 unless result == classified_result
51
- end
52
- end
46
+ error = if update >= threshold
47
+ classify_error pool
48
+ else
49
+ 0
50
+ end
53
51
 
54
- [error, update]
52
+ {:error => error, :update_count => update}
55
53
  end
56
54
 
57
55
  # The final coefficient of the line
@@ -81,6 +79,17 @@ module ML
81
79
  def update_vector x, y
82
80
  @w = @w + y * x
83
81
  end
82
+
83
+ def classify_error supervised_data
84
+ error = 0
85
+
86
+ for data, result in supervised_data
87
+ classified_result = (classify(Matrix.column_vector(data)) <=> 0)
88
+ error += 1 unless result == classified_result
89
+ end
90
+
91
+ error
92
+ end
84
93
  end
85
94
  end
86
95
  end
@@ -0,0 +1,38 @@
1
+ module ML
2
+ module Learner
3
+ # Implementation of pocket learning algorithm
4
+ class PocketLearner < PerceptronLearner
5
+ # Train with supervised data
6
+ #
7
+ # @param [Hash] data supervised input data (mapping from array to integer)
8
+ # @param [Integer] iteration the number of the iterations
9
+ def train! data, iteration
10
+ pool = data.to_a
11
+ best_error, pocket = 1.0/0, nil
12
+
13
+ iteration.times do
14
+ # update pocket
15
+ error = classify_error pool
16
+ if error < best_error
17
+ error = best_error
18
+ pocket = @w.dup
19
+ end
20
+ break if best_error == 0
21
+
22
+ # the random order
23
+ order = (1...(pool.size)).to_a.shuffle
24
+
25
+ for i in order
26
+ dat, result = pool[i]
27
+ aug_data = Matrix.column_vector(dat)
28
+
29
+ if wrongly_classify aug_data, result
30
+ update_vector aug_data, result
31
+ break
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
data/lib/ml.rb CHANGED
@@ -7,6 +7,8 @@ require 'data/parser'
7
7
 
8
8
  require 'method/perceptron'
9
9
  require 'method/adaptive_perceptron'
10
+ require 'method/pocket'
11
+ require 'method/decision_stump'
10
12
 
11
13
  # Top namespace for machine learning algorithms
12
14
  module ML
@@ -19,4 +21,4 @@ module ML
19
21
  end
20
22
  end
21
23
 
22
- MachingLearning = ML
24
+ MachineLearning = ML
data/ml.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "ml"
8
- s.version = "0.2.0"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Andrew Liu"]
12
- s.date = "2011-10-17"
12
+ s.date = "2011-11-17"
13
13
  s.description = "Machine learning library in Ruby"
14
14
  s.email = "andrewliu33@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -27,7 +27,9 @@ Gem::Specification.new do |s|
27
27
  "lib/data/parser.rb",
28
28
  "lib/data/plotter.rb",
29
29
  "lib/method/adaptive_perceptron.rb",
30
+ "lib/method/decision_stump.rb",
30
31
  "lib/method/perceptron.rb",
32
+ "lib/method/pocket.rb",
31
33
  "lib/ml.rb",
32
34
  "ml.gemspec",
33
35
  "spec/data_spec.rb",
@@ -8,7 +8,9 @@ describe "Learner" do
8
8
  generator = ML::Data::Generator2D.new
9
9
  data = generator.points_2d(10)
10
10
 
11
- error, update_count = learner.train! data
11
+ response = learner.train! data
12
+ error = response[:error]
13
+ update_count = response[:update_count]
12
14
 
13
15
  line = learner.line
14
16
  line.should.kind_of?(Array).should == true
@@ -23,7 +25,9 @@ describe "Learner" do
23
25
  generator = ML::Data::Generator.new(4)
24
26
  data = generator.points(10, ML::Data::Generator.generate_vector(4))
25
27
 
26
- error, update_count = learner.train! data
28
+ response = learner.train! data
29
+ error = response[:error]
30
+ update_count = response[:update_count]
27
31
 
28
32
  line = learner.line
29
33
  line.should.kind_of?(Array).should == true
@@ -37,10 +41,12 @@ describe "Learner" do
37
41
  it "should run adaptive perceptron learning in hyperspace" do
38
42
  learner = ML::Learner::AdaptivePerceptronLearner.new(4, 0.1)
39
43
 
40
- generator = ML::Data::Generator.new(4)
44
+ generator = ML::Data::Generator.new(4, 100)
41
45
  data = generator.points(10, ML::Data::Generator.generate_vector(4))
42
46
 
43
- error, update_count = learner.train! data, 1000
47
+ response = learner.train! data, 1000
48
+ error = response[:error]
49
+ update_count = response[:update_count]
44
50
 
45
51
  line = learner.line
46
52
  line.should.kind_of?(Array).should == true
@@ -50,4 +56,56 @@ describe "Learner" do
50
56
  update_count.should < 1000
51
57
  end
52
58
  end
59
+
60
+ describe "Pocket Learner" do
61
+ it "should run pocket perceptron learning in hyperspace" do
62
+ learner = ML::Learner::PocketLearner.new(4)
63
+
64
+ generator = ML::Data::Generator.new(4)
65
+ data = generator.points(10, ML::Data::Generator.generate_vector(4))
66
+
67
+ learner.train! data, 1000
68
+
69
+ line = learner.line
70
+ line.should.kind_of?(Array).should == true
71
+ line.size.should == 5
72
+ end
73
+
74
+ it "should run pocket perceptron learning in noisy data" do
75
+ learner = ML::Learner::PocketLearner.new(4)
76
+
77
+ generator = ML::Data::Generator.new(4, 1, 0.1)
78
+ data = generator.points(10, ML::Data::Generator.generate_vector(4))
79
+
80
+ learner.train! data, 1000
81
+
82
+ line = learner.line
83
+ line.should.kind_of?(Array).should == true
84
+ line.size.should == 5
85
+ end
86
+ end
87
+
88
+ describe "Decision Stump Learner" do
89
+ it "should run decision stump learning in hyperspace" do
90
+ learner = ML::Learner::DecisionStumpLearner.new(4)
91
+
92
+ generator = ML::Data::Generator.new(4)
93
+ data = generator.points(10, ML::Data::Generator.generate_vector(4))
94
+
95
+ learner.train! data
96
+ vector = learner.error_vector
97
+ vector.size.should == 4
98
+ end
99
+
100
+ it "should run decision stump learning in noisy data" do
101
+ learner = ML::Learner::DecisionStumpLearner.new(4)
102
+
103
+ generator = ML::Data::Generator.new(4, 1, 0.1)
104
+ data = generator.points(10, ML::Data::Generator.generate_vector(4))
105
+
106
+ learner.train! data
107
+ vector = learner.error_vector
108
+ vector.size.should == 4
109
+ end
110
+ end
53
111
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-17 00:00:00.000000000Z
12
+ date: 2011-11-17 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rubyvis
16
- requirement: &2157025440 !ruby/object:Gem::Requirement
16
+ requirement: &2159119320 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2157025440
24
+ version_requirements: *2159119320
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: nokogiri
27
- requirement: &2157024740 !ruby/object:Gem::Requirement
27
+ requirement: &2159118600 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *2157024740
35
+ version_requirements: *2159118600
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: bacon
38
- requirement: &2157023900 !ruby/object:Gem::Requirement
38
+ requirement: &2159117940 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *2157023900
46
+ version_requirements: *2159117940
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: yard
49
- requirement: &2157022940 !ruby/object:Gem::Requirement
49
+ requirement: &2159117460 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.6.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *2157022940
57
+ version_requirements: *2159117460
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: bundler
60
- requirement: &2157022300 !ruby/object:Gem::Requirement
60
+ requirement: &2159116740 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *2157022300
68
+ version_requirements: *2159116740
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: jeweler
71
- requirement: &2157021360 !ruby/object:Gem::Requirement
71
+ requirement: &2159116240 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.6.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *2157021360
79
+ version_requirements: *2159116240
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: rcov
82
- requirement: &2157020280 !ruby/object:Gem::Requirement
82
+ requirement: &2159109840 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *2157020280
90
+ version_requirements: *2159109840
91
91
  description: Machine learning library in Ruby
92
92
  email: andrewliu33@gmail.com
93
93
  executables: []
@@ -106,7 +106,9 @@ files:
106
106
  - lib/data/parser.rb
107
107
  - lib/data/plotter.rb
108
108
  - lib/method/adaptive_perceptron.rb
109
+ - lib/method/decision_stump.rb
109
110
  - lib/method/perceptron.rb
111
+ - lib/method/pocket.rb
110
112
  - lib/ml.rb
111
113
  - ml.gemspec
112
114
  - spec/data_spec.rb
@@ -127,7 +129,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
127
129
  version: '0'
128
130
  segments:
129
131
  - 0
130
- hash: -383772979455407848
132
+ hash: 2827153144592279610
131
133
  required_rubygems_version: !ruby/object:Gem::Requirement
132
134
  none: false
133
135
  requirements: