som 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -12,8 +12,10 @@ A pure Ruby implementation of the Self Organising Map machine learning Algorithm
12
12
  require 'rubygems'
13
13
  require 'som'
14
14
 
15
- a = SOM.new(:number_of_nodes => 4, :dimensions => 3)
16
- a.train(data)
15
+ data = [[1,2,3], [4,5,6]...]
16
+
17
+ a = SOM.new(data, :number_of_nodes => 4, :dimensions => 3)
18
+ a.train
17
19
 
18
20
  # Returns the index of the data you gave it
19
21
  a.inspect
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
data/examples/example.rb CHANGED
@@ -1,10 +1,30 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/som_data')
1
2
  require File.expand_path(File.dirname(__FILE__) + '/../lib/som')
3
+ require 'benchmark'
2
4
 
3
- data = Array.new(100) {Array.new(3) {rand}}
5
+ require 'rubygems'
6
+ require 'normalizer'
4
7
 
5
- a = SOM.new(:number_of_nodes => 4, :dimensions => 3)
8
+ min, max = Normalizer.find_min_and_max(SOM_DATA)
6
9
 
7
- a.train(data)
10
+ normalizer = Normalizer.new(:min => min, :max => max)
8
11
 
9
- # Returns the index of the data you gave it
10
- puts a.inspect.inspect
12
+ data = []
13
+
14
+ SOM_DATA.each do |n|
15
+ data << normalizer.normalize(n)
16
+ end
17
+
18
+ a = SOM.new(data, :nodes => 10, :dimensions => data[0].size)
19
+
20
+ puts a.nodes.inspect
21
+
22
+ puts a.global_distance_error
23
+
24
+ times = Benchmark.measure do
25
+ a.train
26
+ end
27
+
28
+ puts a.global_distance_error
29
+
30
+ puts times
@@ -0,0 +1,158 @@
1
+ # Taken from AI4R Gem
2
+
3
+ # data is from the iris dataset (http://archive.ics.uci.edu/ml/datasets/Iris)
4
+ # it is the full dataset, removing the last column
5
+ # website provides additional information on the dataset itself (attributes, class distribution, etc)
6
+
7
+ SOM_DATA = [
8
+ [5.1, 3.5, 1.4, 0.2],
9
+ [4.9, 3.0, 1.4, 0.2],
10
+ [4.7, 3.2, 1.3, 0.2],
11
+ [4.6, 3.1, 1.5, 0.2],
12
+ [5.0, 3.6, 1.4, 0.2],
13
+ [5.4, 3.9, 1.7, 0.4],
14
+ [4.6, 3.4, 1.4, 0.3],
15
+ [5.0, 3.4, 1.5, 0.2],
16
+ [4.4, 2.9, 1.4, 0.2],
17
+ [4.9, 3.1, 1.5, 0.1],
18
+ [5.4, 3.7, 1.5, 0.2],
19
+ [4.8, 3.4, 1.6, 0.2],
20
+ [4.8, 3.0, 1.4, 0.1],
21
+ [4.3, 3.0, 1.1, 0.1],
22
+ [5.8, 4.0, 1.2, 0.2],
23
+ [5.7, 4.4, 1.5, 0.4],
24
+ [5.4, 3.9, 1.3, 0.4],
25
+ [5.1, 3.5, 1.4, 0.3],
26
+ [5.7, 3.8, 1.7, 0.3],
27
+ [5.1, 3.8, 1.5, 0.3],
28
+ [5.4, 3.4, 1.7, 0.2],
29
+ [5.1, 3.7, 1.5, 0.4],
30
+ [4.6, 3.6, 1.0, 0.2],
31
+ [5.1, 3.3, 1.7, 0.5],
32
+ [4.8, 3.4, 1.9, 0.2],
33
+ [5.0, 3.0, 1.6, 0.2],
34
+ [5.0, 3.4, 1.6, 0.4],
35
+ [5.2, 3.5, 1.5, 0.2],
36
+ [5.2, 3.4, 1.4, 0.2],
37
+ [4.7, 3.2, 1.6, 0.2],
38
+ [4.8, 3.1, 1.6, 0.2],
39
+ [5.4, 3.4, 1.5, 0.4],
40
+ [5.2, 4.1, 1.5, 0.1],
41
+ [5.5, 4.2, 1.4, 0.2],
42
+ [4.9, 3.1, 1.5, 0.1],
43
+ [5.0, 3.2, 1.2, 0.2],
44
+ [5.5, 3.5, 1.3, 0.2],
45
+ [4.9, 3.1, 1.5, 0.1],
46
+ [4.4, 3.0, 1.3, 0.2],
47
+ [5.1, 3.4, 1.5, 0.2],
48
+ [5.0, 3.5, 1.3, 0.3],
49
+ [4.5, 2.3, 1.3, 0.3],
50
+ [4.4, 3.2, 1.3, 0.2],
51
+ [5.0, 3.5, 1.6, 0.6],
52
+ [5.1, 3.8, 1.9, 0.4],
53
+ [4.8, 3.0, 1.4, 0.3],
54
+ [5.1, 3.8, 1.6, 0.2],
55
+ [4.6, 3.2, 1.4, 0.2],
56
+ [5.3, 3.7, 1.5, 0.2],
57
+ [5.0, 3.3, 1.4, 0.2],
58
+ [7.0, 3.2, 4.7, 1.4],
59
+ [6.4, 3.2, 4.5, 1.5],
60
+ [6.9, 3.1, 4.9, 1.5],
61
+ [5.5, 2.3, 4.0, 1.3],
62
+ [6.5, 2.8, 4.6, 1.5],
63
+ [5.7, 2.8, 4.5, 1.3],
64
+ [6.3, 3.3, 4.7, 1.6],
65
+ [4.9, 2.4, 3.3, 1.0],
66
+ [6.6, 2.9, 4.6, 1.3],
67
+ [5.2, 2.7, 3.9, 1.4],
68
+ [5.0, 2.0, 3.5, 1.0],
69
+ [5.9, 3.0, 4.2, 1.5],
70
+ [6.0, 2.2, 4.0, 1.0],
71
+ [6.1, 2.9, 4.7, 1.4],
72
+ [5.6, 2.9, 3.6, 1.3],
73
+ [6.7, 3.1, 4.4, 1.4],
74
+ [5.6, 3.0, 4.5, 1.5],
75
+ [5.8, 2.7, 4.1, 1.0],
76
+ [6.2, 2.2, 4.5, 1.5],
77
+ [5.6, 2.5, 3.9, 1.1],
78
+ [5.9, 3.2, 4.8, 1.8],
79
+ [6.1, 2.8, 4.0, 1.3],
80
+ [6.3, 2.5, 4.9, 1.5],
81
+ [6.1, 2.8, 4.7, 1.2],
82
+ [6.4, 2.9, 4.3, 1.3],
83
+ [6.6, 3.0, 4.4, 1.4],
84
+ [6.8, 2.8, 4.8, 1.4],
85
+ [6.7, 3.0, 5.0, 1.7],
86
+ [6.0, 2.9, 4.5, 1.5],
87
+ [5.7, 2.6, 3.5, 1.0],
88
+ [5.5, 2.4, 3.8, 1.1],
89
+ [5.5, 2.4, 3.7, 1.0],
90
+ [5.8, 2.7, 3.9, 1.2],
91
+ [6.0, 2.7, 5.1, 1.6],
92
+ [5.4, 3.0, 4.5, 1.5],
93
+ [6.0, 3.4, 4.5, 1.6],
94
+ [6.7, 3.1, 4.7, 1.5],
95
+ [6.3, 2.3, 4.4, 1.3],
96
+ [5.6, 3.0, 4.1, 1.3],
97
+ [5.5, 2.5, 4.0, 1.3],
98
+ [5.5, 2.6, 4.4, 1.2],
99
+ [6.1, 3.0, 4.6, 1.4],
100
+ [5.8, 2.6, 4.0, 1.2],
101
+ [5.0, 2.3, 3.3, 1.0],
102
+ [5.6, 2.7, 4.2, 1.3],
103
+ [5.7, 3.0, 4.2, 1.2],
104
+ [5.7, 2.9, 4.2, 1.3],
105
+ [6.2, 2.9, 4.3, 1.3],
106
+ [5.1, 2.5, 3.0, 1.1],
107
+ [5.7, 2.8, 4.1, 1.3],
108
+ [6.3, 3.3, 6.0, 2.5],
109
+ [5.8, 2.7, 5.1, 1.9],
110
+ [7.1, 3.0, 5.9, 2.1],
111
+ [6.3, 2.9, 5.6, 1.8],
112
+ [6.5, 3.0, 5.8, 2.2],
113
+ [7.6, 3.0, 6.6, 2.1],
114
+ [4.9, 2.5, 4.5, 1.7],
115
+ [7.3, 2.9, 6.3, 1.8],
116
+ [6.7, 2.5, 5.8, 1.8],
117
+ [7.2, 3.6, 6.1, 2.5],
118
+ [6.5, 3.2, 5.1, 2.0],
119
+ [6.4, 2.7, 5.3, 1.9],
120
+ [6.8, 3.0, 5.5, 2.1],
121
+ [5.7, 2.5, 5.0, 2.0],
122
+ [5.8, 2.8, 5.1, 2.4],
123
+ [6.4, 3.2, 5.3, 2.3],
124
+ [6.5, 3.0, 5.5, 1.8],
125
+ [7.7, 3.8, 6.7, 2.2],
126
+ [7.7, 2.6, 6.9, 2.3],
127
+ [6.0, 2.2, 5.0, 1.5],
128
+ [6.9, 3.2, 5.7, 2.3],
129
+ [5.6, 2.8, 4.9, 2.0],
130
+ [7.7, 2.8, 6.7, 2.0],
131
+ [6.3, 2.7, 4.9, 1.8],
132
+ [6.7, 3.3, 5.7, 2.1],
133
+ [7.2, 3.2, 6.0, 1.8],
134
+ [6.2, 2.8, 4.8, 1.8],
135
+ [6.1, 3.0, 4.9, 1.8],
136
+ [6.4, 2.8, 5.6, 2.1],
137
+ [7.2, 3.0, 5.8, 1.6],
138
+ [7.4, 2.8, 6.1, 1.9],
139
+ [7.9, 3.8, 6.4, 2.0],
140
+ [6.4, 2.8, 5.6, 2.2],
141
+ [6.3, 2.8, 5.1, 1.5],
142
+ [6.1, 2.6, 5.6, 1.4],
143
+ [7.7, 3.0, 6.1, 2.3],
144
+ [6.3, 3.4, 5.6, 2.4],
145
+ [6.4, 3.1, 5.5, 1.8],
146
+ [6.0, 3.0, 4.8, 1.8],
147
+ [6.9, 3.1, 5.4, 2.1],
148
+ [6.7, 3.1, 5.6, 2.4],
149
+ [6.9, 3.1, 5.1, 2.3],
150
+ [5.8, 2.7, 5.1, 1.9],
151
+ [6.8, 3.2, 5.9, 2.3],
152
+ [6.7, 3.3, 5.7, 2.5],
153
+ [6.7, 3.0, 5.2, 2.3],
154
+ [6.3, 2.5, 5.0, 1.9],
155
+ [6.5, 3.0, 5.2, 2.0],
156
+ [6.2, 3.4, 5.4, 2.3],
157
+ [5.9, 3.0, 5.1, 1.8],
158
+ ]
data/lib/som/node.rb CHANGED
@@ -34,8 +34,8 @@ class Node
34
34
  private
35
35
 
36
36
  def create_weights(number_of_weights)
37
- number_of_weights.times do
38
- weights << (rand > 0.5 ? -rand : rand)
37
+ number_of_weights.times do |n|
38
+ weights << rand
39
39
  end
40
40
  end
41
41
 
data/lib/som.rb CHANGED
@@ -2,28 +2,30 @@ require File.expand_path(File.dirname(__FILE__) + '/som/node')
2
2
 
3
3
  class SOM
4
4
 
5
- def initialize(options={})
5
+ def initialize(training_data, options={})
6
+ @training_data = training_data
6
7
  @number_of_nodes = options[:nodes] || 5
7
8
  @dimensions = options[:dimensions]
8
9
  @learning_rate = options[:learning_rate] || 0.5
9
10
  @radius = options[:radius] || @number_of_nodes / 2
10
- @iteration_count = 0
11
- @max_iterations = options[:max_iterations] || 500
11
+ @iteration_count = 1
12
+ @max_iterations = options[:max_iterations] || 100
12
13
  # TODO: Allow a lambda so we can use different neighborhood functions
13
14
  @neighborhood_function = options[:neighborhood_function] || 1
14
- create_nodes
15
+
16
+ create_nodes(training_data)
15
17
  end
16
18
 
17
19
  def nodes
18
20
  @nodes ||= []
19
21
  end
20
22
 
21
- def train(data)
22
- while train_it!(data)
23
+ def train
24
+ while train_it!(@training_data)
23
25
  end
24
26
  # Place the data in the nodes buckets so we can see how
25
27
  # The data has been clustered
26
- place_data_into_buckets(data)
28
+ place_data_into_buckets(@training_data)
27
29
  end
28
30
 
29
31
  # Returns an array of buckets containing the index of the data given
@@ -41,11 +43,18 @@ class SOM
41
43
  closest_node.bucket
42
44
  end
43
45
 
46
+ # Taken from AI4R SOM library #107
47
+ def global_distance_error
48
+ @training_data.inject(0) do |sum, n|
49
+ sum + find_closest(n)[1]
50
+ end
51
+ end
52
+
44
53
  private
45
54
 
46
55
  def train_it!(data)
47
56
  return false if @iteration_count >= @max_iterations
48
-
57
+
49
58
  data.each do |input|
50
59
  # Update closest node
51
60
  closest_node = find_closest_node(input)
@@ -54,14 +63,12 @@ class SOM
54
63
  # Update nodes that closer than the radius
55
64
  other_nodes = nodes - [closest_node]
56
65
  other_nodes.each do |node|
57
- next if @radius > node.distance_from(closest_node.weights)
66
+ next if decayed_radius > node.distance_from(closest_node.weights)
58
67
 
59
68
  node.update_weight(@learning_rate, input, neighborhood_function)
60
69
  end
61
70
  end
62
71
 
63
- decrease_radius!
64
- decrease_learning_rate!
65
72
  increase_iteration_count!
66
73
  end
67
74
 
@@ -72,14 +79,14 @@ class SOM
72
79
  end
73
80
  end
74
81
 
75
- def decrease_radius!
76
- @radius = 0.5 * @radius * @iteration_count / @max_iterations
82
+ def decayed_radius
83
+ @radius - (0.1 * @radius * @iteration_count / @max_iterations)
77
84
  end
78
85
 
79
- def decrease_learning_rate!
80
- @learning_rate = 0.5 * @learning_rate * @iteration_count / @max_iterations
86
+ def decayed_learning_rate
87
+ @learning_rate - (0.5 * @learning_rate * @iteration_count / @max_iterations)
81
88
  end
82
-
89
+
83
90
  def increase_iteration_count!
84
91
  @iteration_count += 1
85
92
  end
@@ -89,6 +96,10 @@ class SOM
89
96
  end
90
97
 
91
98
  def find_closest_node(data)
99
+ find_closest(data)[0]
100
+ end
101
+
102
+ def find_closest(data)
92
103
  closest_node = [nodes[0], nodes[0].distance_from(data)]
93
104
 
94
105
  nodes[1..-1].each do |node|
@@ -97,10 +108,10 @@ class SOM
97
108
  closest_node = [node, distance]
98
109
  end
99
110
  end
100
- closest_node[0]
111
+ closest_node
101
112
  end
102
-
103
- def create_nodes
113
+
114
+ def create_nodes(data)
104
115
  @number_of_nodes.times { nodes << Node.new(@dimensions) }
105
116
  end
106
117
 
data/som.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{som}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2009-11-26}
12
+ s.date = %q{2009-11-29}
13
13
  s.description = %q{A Self Organising Map}
14
14
  s.email = %q{reddavis@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -24,6 +24,7 @@ Gem::Specification.new do |s|
24
24
  "Rakefile",
25
25
  "VERSION",
26
26
  "examples/example.rb",
27
+ "examples/som_data.rb",
27
28
  "lib/som.rb",
28
29
  "lib/som/node.rb",
29
30
  "som.gemspec",
@@ -41,7 +42,8 @@ Gem::Specification.new do |s|
41
42
  "spec/node_spec.rb",
42
43
  "spec/som_spec.rb",
43
44
  "spec/spec_helper.rb",
44
- "examples/example.rb"
45
+ "examples/example.rb",
46
+ "examples/som_data.rb"
45
47
  ]
46
48
 
47
49
  if s.respond_to? :specification_version then
data/spec/som_spec.rb CHANGED
@@ -1,54 +1,43 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
- describe "Som" do
4
- describe "Initialization" do
5
- before do
6
- @a = SOM.new(:nodes => 10, :dimensions => 5)
7
- end
8
-
9
- it "should have 10 nodes" do
10
- @a.nodes.size.should == 10
11
- end
12
- end
13
-
3
+ describe "Som" do
14
4
  describe "Training" do
15
5
  before do
16
- @a = SOM.new(:nodes => 1, :dimensions => 2)
17
- @data = [[2,3]]
6
+ @data = [[0,0], [0,0.5], [2,4], [6,5]]
7
+ @a = SOM.new(@data, :nodes => 1, :dimensions => 2)
18
8
  end
19
9
 
20
10
  it "should change the weight of the best matching node" do
21
11
  before = @a.nodes.map {|x| x.weights.clone}
22
- @a.train(@data)
12
+ @a.train
23
13
  after = @a.nodes.map {|x| x.weights}
24
14
 
25
15
  before.should_not == after
26
16
  end
27
17
 
28
- it "should will the nodes bucket with the data" do
29
- @a.train(@data)
18
+ it "should fill the nodes bucket with the data" do
19
+ @a.train
30
20
  @a.nodes[0].bucket.should_not be_empty
31
21
  end
32
22
 
33
23
  it "should preserve data indexes" do
34
- data = [[0,0], [0,0.5], [2,4], [6,5]]
35
- @a.train(data)
24
+ @a.train
36
25
 
37
26
  index_returned = @a.nodes[0].bucket[0][0]
38
27
  data_returned = @a.nodes[0].bucket[0][1]
39
28
 
40
- data[index_returned].should == data_returned
29
+ @data[index_returned].should == data_returned
41
30
  end
42
31
  end
43
32
 
44
33
  describe "Inspect" do
45
34
  before do
46
- @a = SOM.new(:nodes => 1, :dimensions => 2)
47
- @data = [[2,3]]
35
+ data = [[2,3]]
36
+ @a = SOM.new(data, :nodes => 1, :dimensions => 2)
48
37
  end
49
38
 
50
39
  it "should show the clusters of data indexes" do
51
- @a.train(@data)
40
+ @a.train
52
41
  @a.inspect.should be_an(Array)
53
42
  @a.inspect.size.should == 1
54
43
  end
@@ -56,12 +45,12 @@ describe "Som" do
56
45
 
57
46
  describe "Clustering" do
58
47
  before do
59
- @a = SOM.new(:nodes => 2, :dimensions => 2)
48
+ data = [[0,0], [999,999]]
49
+ @a = SOM.new(data, :nodes => 2, :dimensions => 2)
60
50
  end
61
51
 
62
52
  it "should belong to 2 seperate nodes" do
63
- data = [[0,0], [999,999]]
64
- @a.train(data)
53
+ @a.train
65
54
  @a.inspect[0].should_not be_empty
66
55
  @a.inspect[1].should_not be_empty
67
56
  end
@@ -69,14 +58,25 @@ describe "Som" do
69
58
 
70
59
  describe "Classify" do
71
60
  before do
72
- @a = SOM.new(:nodes => 2, :dimensions => 2)
61
+ data = [[0,0], [999,999]]
62
+ @a = SOM.new(data, :nodes => 2, :dimensions => 2)
73
63
  end
74
64
 
75
65
  it "should belong to 2 seperate nodes" do
76
- data = [[0,0], [999,999]]
77
- @a.train(data)
66
+ @a.train
78
67
  @a.classify([1,1]).should be_an(Array)
79
68
  @a.classify([1,1]).size.should == 1
80
69
  end
81
70
  end
71
+
72
+ describe "Global Distance Error" do
73
+ before do
74
+ data = [[0,0], [999,999]]
75
+ @a = SOM.new(data, :nodes => 2, :dimensions => 2)
76
+ end
77
+
78
+ it "should return an integer" do
79
+ @a.global_distance_error.should be_a(Float)
80
+ end
81
+ end
82
82
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: som
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - reddavis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-26 00:00:00 +00:00
12
+ date: 2009-11-29 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -39,6 +39,7 @@ files:
39
39
  - Rakefile
40
40
  - VERSION
41
41
  - examples/example.rb
42
+ - examples/som_data.rb
42
43
  - lib/som.rb
43
44
  - lib/som/node.rb
44
45
  - som.gemspec
@@ -79,3 +80,4 @@ test_files:
79
80
  - spec/som_spec.rb
80
81
  - spec/spec_helper.rb
81
82
  - examples/example.rb
83
+ - examples/som_data.rb