som 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -12,8 +12,10 @@ A pure Ruby implementation of the Self Organising Map machine learning Algorithm
12
12
  require 'rubygems'
13
13
  require 'som'
14
14
 
15
- a = SOM.new(:number_of_nodes => 4, :dimensions => 3)
16
- a.train(data)
15
+ data = [[1,2,3], [4,5,6]...]
16
+
17
+ a = SOM.new(data, :number_of_nodes => 4, :dimensions => 3)
18
+ a.train
17
19
 
18
20
  # Returns the index of the data you gave it
19
21
  a.inspect
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
data/examples/example.rb CHANGED
@@ -1,10 +1,30 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/som_data')
1
2
  require File.expand_path(File.dirname(__FILE__) + '/../lib/som')
3
+ require 'benchmark'
2
4
 
3
- data = Array.new(100) {Array.new(3) {rand}}
5
+ require 'rubygems'
6
+ require 'normalizer'
4
7
 
5
- a = SOM.new(:number_of_nodes => 4, :dimensions => 3)
8
+ min, max = Normalizer.find_min_and_max(SOM_DATA)
6
9
 
7
- a.train(data)
10
+ normalizer = Normalizer.new(:min => min, :max => max)
8
11
 
9
- # Returns the index of the data you gave it
10
- puts a.inspect.inspect
12
+ data = []
13
+
14
+ SOM_DATA.each do |n|
15
+ data << normalizer.normalize(n)
16
+ end
17
+
18
+ a = SOM.new(data, :nodes => 10, :dimensions => data[0].size)
19
+
20
+ puts a.nodes.inspect
21
+
22
+ puts a.global_distance_error
23
+
24
+ times = Benchmark.measure do
25
+ a.train
26
+ end
27
+
28
+ puts a.global_distance_error
29
+
30
+ puts times
@@ -0,0 +1,158 @@
1
+ # Taken from AI4R Gem
2
+
3
+ # data is from the iris dataset (http://archive.ics.uci.edu/ml/datasets/Iris)
4
+ # it is the full dataset, removing the last column
5
+ # website provides additional information on the dataset itself (attributes, class distribution, etc)
6
+
7
+ SOM_DATA = [
8
+ [5.1, 3.5, 1.4, 0.2],
9
+ [4.9, 3.0, 1.4, 0.2],
10
+ [4.7, 3.2, 1.3, 0.2],
11
+ [4.6, 3.1, 1.5, 0.2],
12
+ [5.0, 3.6, 1.4, 0.2],
13
+ [5.4, 3.9, 1.7, 0.4],
14
+ [4.6, 3.4, 1.4, 0.3],
15
+ [5.0, 3.4, 1.5, 0.2],
16
+ [4.4, 2.9, 1.4, 0.2],
17
+ [4.9, 3.1, 1.5, 0.1],
18
+ [5.4, 3.7, 1.5, 0.2],
19
+ [4.8, 3.4, 1.6, 0.2],
20
+ [4.8, 3.0, 1.4, 0.1],
21
+ [4.3, 3.0, 1.1, 0.1],
22
+ [5.8, 4.0, 1.2, 0.2],
23
+ [5.7, 4.4, 1.5, 0.4],
24
+ [5.4, 3.9, 1.3, 0.4],
25
+ [5.1, 3.5, 1.4, 0.3],
26
+ [5.7, 3.8, 1.7, 0.3],
27
+ [5.1, 3.8, 1.5, 0.3],
28
+ [5.4, 3.4, 1.7, 0.2],
29
+ [5.1, 3.7, 1.5, 0.4],
30
+ [4.6, 3.6, 1.0, 0.2],
31
+ [5.1, 3.3, 1.7, 0.5],
32
+ [4.8, 3.4, 1.9, 0.2],
33
+ [5.0, 3.0, 1.6, 0.2],
34
+ [5.0, 3.4, 1.6, 0.4],
35
+ [5.2, 3.5, 1.5, 0.2],
36
+ [5.2, 3.4, 1.4, 0.2],
37
+ [4.7, 3.2, 1.6, 0.2],
38
+ [4.8, 3.1, 1.6, 0.2],
39
+ [5.4, 3.4, 1.5, 0.4],
40
+ [5.2, 4.1, 1.5, 0.1],
41
+ [5.5, 4.2, 1.4, 0.2],
42
+ [4.9, 3.1, 1.5, 0.1],
43
+ [5.0, 3.2, 1.2, 0.2],
44
+ [5.5, 3.5, 1.3, 0.2],
45
+ [4.9, 3.1, 1.5, 0.1],
46
+ [4.4, 3.0, 1.3, 0.2],
47
+ [5.1, 3.4, 1.5, 0.2],
48
+ [5.0, 3.5, 1.3, 0.3],
49
+ [4.5, 2.3, 1.3, 0.3],
50
+ [4.4, 3.2, 1.3, 0.2],
51
+ [5.0, 3.5, 1.6, 0.6],
52
+ [5.1, 3.8, 1.9, 0.4],
53
+ [4.8, 3.0, 1.4, 0.3],
54
+ [5.1, 3.8, 1.6, 0.2],
55
+ [4.6, 3.2, 1.4, 0.2],
56
+ [5.3, 3.7, 1.5, 0.2],
57
+ [5.0, 3.3, 1.4, 0.2],
58
+ [7.0, 3.2, 4.7, 1.4],
59
+ [6.4, 3.2, 4.5, 1.5],
60
+ [6.9, 3.1, 4.9, 1.5],
61
+ [5.5, 2.3, 4.0, 1.3],
62
+ [6.5, 2.8, 4.6, 1.5],
63
+ [5.7, 2.8, 4.5, 1.3],
64
+ [6.3, 3.3, 4.7, 1.6],
65
+ [4.9, 2.4, 3.3, 1.0],
66
+ [6.6, 2.9, 4.6, 1.3],
67
+ [5.2, 2.7, 3.9, 1.4],
68
+ [5.0, 2.0, 3.5, 1.0],
69
+ [5.9, 3.0, 4.2, 1.5],
70
+ [6.0, 2.2, 4.0, 1.0],
71
+ [6.1, 2.9, 4.7, 1.4],
72
+ [5.6, 2.9, 3.6, 1.3],
73
+ [6.7, 3.1, 4.4, 1.4],
74
+ [5.6, 3.0, 4.5, 1.5],
75
+ [5.8, 2.7, 4.1, 1.0],
76
+ [6.2, 2.2, 4.5, 1.5],
77
+ [5.6, 2.5, 3.9, 1.1],
78
+ [5.9, 3.2, 4.8, 1.8],
79
+ [6.1, 2.8, 4.0, 1.3],
80
+ [6.3, 2.5, 4.9, 1.5],
81
+ [6.1, 2.8, 4.7, 1.2],
82
+ [6.4, 2.9, 4.3, 1.3],
83
+ [6.6, 3.0, 4.4, 1.4],
84
+ [6.8, 2.8, 4.8, 1.4],
85
+ [6.7, 3.0, 5.0, 1.7],
86
+ [6.0, 2.9, 4.5, 1.5],
87
+ [5.7, 2.6, 3.5, 1.0],
88
+ [5.5, 2.4, 3.8, 1.1],
89
+ [5.5, 2.4, 3.7, 1.0],
90
+ [5.8, 2.7, 3.9, 1.2],
91
+ [6.0, 2.7, 5.1, 1.6],
92
+ [5.4, 3.0, 4.5, 1.5],
93
+ [6.0, 3.4, 4.5, 1.6],
94
+ [6.7, 3.1, 4.7, 1.5],
95
+ [6.3, 2.3, 4.4, 1.3],
96
+ [5.6, 3.0, 4.1, 1.3],
97
+ [5.5, 2.5, 4.0, 1.3],
98
+ [5.5, 2.6, 4.4, 1.2],
99
+ [6.1, 3.0, 4.6, 1.4],
100
+ [5.8, 2.6, 4.0, 1.2],
101
+ [5.0, 2.3, 3.3, 1.0],
102
+ [5.6, 2.7, 4.2, 1.3],
103
+ [5.7, 3.0, 4.2, 1.2],
104
+ [5.7, 2.9, 4.2, 1.3],
105
+ [6.2, 2.9, 4.3, 1.3],
106
+ [5.1, 2.5, 3.0, 1.1],
107
+ [5.7, 2.8, 4.1, 1.3],
108
+ [6.3, 3.3, 6.0, 2.5],
109
+ [5.8, 2.7, 5.1, 1.9],
110
+ [7.1, 3.0, 5.9, 2.1],
111
+ [6.3, 2.9, 5.6, 1.8],
112
+ [6.5, 3.0, 5.8, 2.2],
113
+ [7.6, 3.0, 6.6, 2.1],
114
+ [4.9, 2.5, 4.5, 1.7],
115
+ [7.3, 2.9, 6.3, 1.8],
116
+ [6.7, 2.5, 5.8, 1.8],
117
+ [7.2, 3.6, 6.1, 2.5],
118
+ [6.5, 3.2, 5.1, 2.0],
119
+ [6.4, 2.7, 5.3, 1.9],
120
+ [6.8, 3.0, 5.5, 2.1],
121
+ [5.7, 2.5, 5.0, 2.0],
122
+ [5.8, 2.8, 5.1, 2.4],
123
+ [6.4, 3.2, 5.3, 2.3],
124
+ [6.5, 3.0, 5.5, 1.8],
125
+ [7.7, 3.8, 6.7, 2.2],
126
+ [7.7, 2.6, 6.9, 2.3],
127
+ [6.0, 2.2, 5.0, 1.5],
128
+ [6.9, 3.2, 5.7, 2.3],
129
+ [5.6, 2.8, 4.9, 2.0],
130
+ [7.7, 2.8, 6.7, 2.0],
131
+ [6.3, 2.7, 4.9, 1.8],
132
+ [6.7, 3.3, 5.7, 2.1],
133
+ [7.2, 3.2, 6.0, 1.8],
134
+ [6.2, 2.8, 4.8, 1.8],
135
+ [6.1, 3.0, 4.9, 1.8],
136
+ [6.4, 2.8, 5.6, 2.1],
137
+ [7.2, 3.0, 5.8, 1.6],
138
+ [7.4, 2.8, 6.1, 1.9],
139
+ [7.9, 3.8, 6.4, 2.0],
140
+ [6.4, 2.8, 5.6, 2.2],
141
+ [6.3, 2.8, 5.1, 1.5],
142
+ [6.1, 2.6, 5.6, 1.4],
143
+ [7.7, 3.0, 6.1, 2.3],
144
+ [6.3, 3.4, 5.6, 2.4],
145
+ [6.4, 3.1, 5.5, 1.8],
146
+ [6.0, 3.0, 4.8, 1.8],
147
+ [6.9, 3.1, 5.4, 2.1],
148
+ [6.7, 3.1, 5.6, 2.4],
149
+ [6.9, 3.1, 5.1, 2.3],
150
+ [5.8, 2.7, 5.1, 1.9],
151
+ [6.8, 3.2, 5.9, 2.3],
152
+ [6.7, 3.3, 5.7, 2.5],
153
+ [6.7, 3.0, 5.2, 2.3],
154
+ [6.3, 2.5, 5.0, 1.9],
155
+ [6.5, 3.0, 5.2, 2.0],
156
+ [6.2, 3.4, 5.4, 2.3],
157
+ [5.9, 3.0, 5.1, 1.8],
158
+ ]
data/lib/som/node.rb CHANGED
@@ -34,8 +34,8 @@ class Node
34
34
  private
35
35
 
36
36
  def create_weights(number_of_weights)
37
- number_of_weights.times do
38
- weights << (rand > 0.5 ? -rand : rand)
37
+ number_of_weights.times do |n|
38
+ weights << rand
39
39
  end
40
40
  end
41
41
 
data/lib/som.rb CHANGED
@@ -2,28 +2,30 @@ require File.expand_path(File.dirname(__FILE__) + '/som/node')
2
2
 
3
3
  class SOM
4
4
 
5
- def initialize(options={})
5
+ def initialize(training_data, options={})
6
+ @training_data = training_data
6
7
  @number_of_nodes = options[:nodes] || 5
7
8
  @dimensions = options[:dimensions]
8
9
  @learning_rate = options[:learning_rate] || 0.5
9
10
  @radius = options[:radius] || @number_of_nodes / 2
10
- @iteration_count = 0
11
- @max_iterations = options[:max_iterations] || 500
11
+ @iteration_count = 1
12
+ @max_iterations = options[:max_iterations] || 100
12
13
  # TODO: Allow a lambda so we can use different neighborhood functions
13
14
  @neighborhood_function = options[:neighborhood_function] || 1
14
- create_nodes
15
+
16
+ create_nodes(training_data)
15
17
  end
16
18
 
17
19
  def nodes
18
20
  @nodes ||= []
19
21
  end
20
22
 
21
- def train(data)
22
- while train_it!(data)
23
+ def train
24
+ while train_it!(@training_data)
23
25
  end
24
26
  # Place the data in the nodes buckets so we can see how
25
27
  # The data has been clustered
26
- place_data_into_buckets(data)
28
+ place_data_into_buckets(@training_data)
27
29
  end
28
30
 
29
31
  # Returns an array of buckets containing the index of the data given
@@ -41,11 +43,18 @@ class SOM
41
43
  closest_node.bucket
42
44
  end
43
45
 
46
+ # Taken from AI4R SOM library #107
47
+ def global_distance_error
48
+ @training_data.inject(0) do |sum, n|
49
+ sum + find_closest(n)[1]
50
+ end
51
+ end
52
+
44
53
  private
45
54
 
46
55
  def train_it!(data)
47
56
  return false if @iteration_count >= @max_iterations
48
-
57
+
49
58
  data.each do |input|
50
59
  # Update closest node
51
60
  closest_node = find_closest_node(input)
@@ -54,14 +63,12 @@ class SOM
54
63
  # Update nodes that closer than the radius
55
64
  other_nodes = nodes - [closest_node]
56
65
  other_nodes.each do |node|
57
- next if @radius > node.distance_from(closest_node.weights)
66
+ next if decayed_radius > node.distance_from(closest_node.weights)
58
67
 
59
68
  node.update_weight(@learning_rate, input, neighborhood_function)
60
69
  end
61
70
  end
62
71
 
63
- decrease_radius!
64
- decrease_learning_rate!
65
72
  increase_iteration_count!
66
73
  end
67
74
 
@@ -72,14 +79,14 @@ class SOM
72
79
  end
73
80
  end
74
81
 
75
- def decrease_radius!
76
- @radius = 0.5 * @radius * @iteration_count / @max_iterations
82
+ def decayed_radius
83
+ @radius - (0.1 * @radius * @iteration_count / @max_iterations)
77
84
  end
78
85
 
79
- def decrease_learning_rate!
80
- @learning_rate = 0.5 * @learning_rate * @iteration_count / @max_iterations
86
+ def decayed_learning_rate
87
+ @learning_rate - (0.5 * @learning_rate * @iteration_count / @max_iterations)
81
88
  end
82
-
89
+
83
90
  def increase_iteration_count!
84
91
  @iteration_count += 1
85
92
  end
@@ -89,6 +96,10 @@ class SOM
89
96
  end
90
97
 
91
98
  def find_closest_node(data)
99
+ find_closest(data)[0]
100
+ end
101
+
102
+ def find_closest(data)
92
103
  closest_node = [nodes[0], nodes[0].distance_from(data)]
93
104
 
94
105
  nodes[1..-1].each do |node|
@@ -97,10 +108,10 @@ class SOM
97
108
  closest_node = [node, distance]
98
109
  end
99
110
  end
100
- closest_node[0]
111
+ closest_node
101
112
  end
102
-
103
- def create_nodes
113
+
114
+ def create_nodes(data)
104
115
  @number_of_nodes.times { nodes << Node.new(@dimensions) }
105
116
  end
106
117
 
data/som.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{som}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2009-11-26}
12
+ s.date = %q{2009-11-29}
13
13
  s.description = %q{A Self Organising Map}
14
14
  s.email = %q{reddavis@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -24,6 +24,7 @@ Gem::Specification.new do |s|
24
24
  "Rakefile",
25
25
  "VERSION",
26
26
  "examples/example.rb",
27
+ "examples/som_data.rb",
27
28
  "lib/som.rb",
28
29
  "lib/som/node.rb",
29
30
  "som.gemspec",
@@ -41,7 +42,8 @@ Gem::Specification.new do |s|
41
42
  "spec/node_spec.rb",
42
43
  "spec/som_spec.rb",
43
44
  "spec/spec_helper.rb",
44
- "examples/example.rb"
45
+ "examples/example.rb",
46
+ "examples/som_data.rb"
45
47
  ]
46
48
 
47
49
  if s.respond_to? :specification_version then
data/spec/som_spec.rb CHANGED
@@ -1,54 +1,43 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
- describe "Som" do
4
- describe "Initialization" do
5
- before do
6
- @a = SOM.new(:nodes => 10, :dimensions => 5)
7
- end
8
-
9
- it "should have 10 nodes" do
10
- @a.nodes.size.should == 10
11
- end
12
- end
13
-
3
+ describe "Som" do
14
4
  describe "Training" do
15
5
  before do
16
- @a = SOM.new(:nodes => 1, :dimensions => 2)
17
- @data = [[2,3]]
6
+ @data = [[0,0], [0,0.5], [2,4], [6,5]]
7
+ @a = SOM.new(@data, :nodes => 1, :dimensions => 2)
18
8
  end
19
9
 
20
10
  it "should change the weight of the best matching node" do
21
11
  before = @a.nodes.map {|x| x.weights.clone}
22
- @a.train(@data)
12
+ @a.train
23
13
  after = @a.nodes.map {|x| x.weights}
24
14
 
25
15
  before.should_not == after
26
16
  end
27
17
 
28
- it "should will the nodes bucket with the data" do
29
- @a.train(@data)
18
+ it "should fill the nodes bucket with the data" do
19
+ @a.train
30
20
  @a.nodes[0].bucket.should_not be_empty
31
21
  end
32
22
 
33
23
  it "should preserve data indexes" do
34
- data = [[0,0], [0,0.5], [2,4], [6,5]]
35
- @a.train(data)
24
+ @a.train
36
25
 
37
26
  index_returned = @a.nodes[0].bucket[0][0]
38
27
  data_returned = @a.nodes[0].bucket[0][1]
39
28
 
40
- data[index_returned].should == data_returned
29
+ @data[index_returned].should == data_returned
41
30
  end
42
31
  end
43
32
 
44
33
  describe "Inspect" do
45
34
  before do
46
- @a = SOM.new(:nodes => 1, :dimensions => 2)
47
- @data = [[2,3]]
35
+ data = [[2,3]]
36
+ @a = SOM.new(data, :nodes => 1, :dimensions => 2)
48
37
  end
49
38
 
50
39
  it "should show the clusters of data indexes" do
51
- @a.train(@data)
40
+ @a.train
52
41
  @a.inspect.should be_an(Array)
53
42
  @a.inspect.size.should == 1
54
43
  end
@@ -56,12 +45,12 @@ describe "Som" do
56
45
 
57
46
  describe "Clustering" do
58
47
  before do
59
- @a = SOM.new(:nodes => 2, :dimensions => 2)
48
+ data = [[0,0], [999,999]]
49
+ @a = SOM.new(data, :nodes => 2, :dimensions => 2)
60
50
  end
61
51
 
62
52
  it "should belong to 2 seperate nodes" do
63
- data = [[0,0], [999,999]]
64
- @a.train(data)
53
+ @a.train
65
54
  @a.inspect[0].should_not be_empty
66
55
  @a.inspect[1].should_not be_empty
67
56
  end
@@ -69,14 +58,25 @@ describe "Som" do
69
58
 
70
59
  describe "Classify" do
71
60
  before do
72
- @a = SOM.new(:nodes => 2, :dimensions => 2)
61
+ data = [[0,0], [999,999]]
62
+ @a = SOM.new(data, :nodes => 2, :dimensions => 2)
73
63
  end
74
64
 
75
65
  it "should belong to 2 seperate nodes" do
76
- data = [[0,0], [999,999]]
77
- @a.train(data)
66
+ @a.train
78
67
  @a.classify([1,1]).should be_an(Array)
79
68
  @a.classify([1,1]).size.should == 1
80
69
  end
81
70
  end
71
+
72
+ describe "Global Distance Error" do
73
+ before do
74
+ data = [[0,0], [999,999]]
75
+ @a = SOM.new(data, :nodes => 2, :dimensions => 2)
76
+ end
77
+
78
+ it "should return an integer" do
79
+ @a.global_distance_error.should be_a(Float)
80
+ end
81
+ end
82
82
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: som
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - reddavis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-26 00:00:00 +00:00
12
+ date: 2009-11-29 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -39,6 +39,7 @@ files:
39
39
  - Rakefile
40
40
  - VERSION
41
41
  - examples/example.rb
42
+ - examples/som_data.rb
42
43
  - lib/som.rb
43
44
  - lib/som/node.rb
44
45
  - som.gemspec
@@ -79,3 +80,4 @@ test_files:
79
80
  - spec/som_spec.rb
80
81
  - spec/spec_helper.rb
81
82
  - examples/example.rb
83
+ - examples/som_data.rb