som 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +24 -5
- data/VERSION +1 -1
- data/examples/example.rb +6 -3
- data/lib/som/node.rb +3 -2
- data/lib/som.rb +47 -26
- data/som.gemspec +2 -2
- data/spec/node_spec.rb +10 -6
- data/spec/som_spec.rb +27 -17
- metadata +2 -2
data/README.rdoc
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
= SOM - Self Organising Map
|
2
2
|
|
3
|
-
A pure Ruby implementation of the Self Organising Map machine learning
|
3
|
+
A pure Ruby implementation of the Self Organising Map machine learning algorithm.
|
4
4
|
|
5
5
|
== Install
|
6
6
|
|
7
|
-
gem sources -a
|
7
|
+
gem sources -a http://gemcutter.org
|
8
8
|
sudo gem install som
|
9
9
|
|
10
10
|
== How To Use
|
@@ -16,11 +16,30 @@ A pure Ruby implementation of the Self Organising Map machine learning Algorithm
|
|
16
16
|
|
17
17
|
a = SOM.new(data, :number_of_nodes => 4, :dimensions => 3)
|
18
18
|
a.train
|
19
|
+
|
20
|
+
# To see which class a new piece of data fits into
|
21
|
+
new_data = [9,8,7]
|
22
|
+
|
23
|
+
# An array is returned containing the index of the
|
24
|
+
# training data that fits into the same class
|
25
|
+
# The index is the same as the index in the training data e.g:
|
26
|
+
# data[index_returned_by_SOM (2)] == data[2]
|
27
|
+
a.classify(new_data)
|
28
|
+
#=> [node_index, [training_data_index_1, training_data_index_2...]]
|
19
29
|
|
20
|
-
# Returns the
|
30
|
+
# Returns the id of a node and the
|
31
|
+
# index of the data that belongs to it
|
21
32
|
a.inspect
|
22
|
-
#=> [[1, 0...], [99, 84...], [
|
33
|
+
#=> [[0, [1, 0...]], [1, [99, 84...]], [2, [11, 23...]]]
|
34
|
+
|
35
|
+
== Options
|
36
|
+
|
37
|
+
SOM.new(data, :number_of_nodes => 1, #Default: 5
|
38
|
+
:learning_rate => 0.7, #Default: 0.5
|
39
|
+
:radius => 1, #Default: number_of_nodes / 2
|
40
|
+
:max_iterations => 100, #Default: 100
|
41
|
+
:verbose => true) #Default: false
|
23
42
|
|
24
43
|
== Copyright
|
25
44
|
|
26
|
-
Copyright (c) 2009 Red Davis. See LICENSE for details.
|
45
|
+
Copyright (c) 2009 Red Davis. See LICENSE for details.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/examples/example.rb
CHANGED
@@ -5,6 +5,8 @@ require 'benchmark'
|
|
5
5
|
require 'rubygems'
|
6
6
|
require 'normalizer'
|
7
7
|
|
8
|
+
#SOM_DATA = Array.new(100) { Array.new(50) {rand}}
|
9
|
+
|
8
10
|
min, max = Normalizer.find_min_and_max(SOM_DATA)
|
9
11
|
|
10
12
|
normalizer = Normalizer.new(:min => min, :max => max)
|
@@ -15,9 +17,8 @@ SOM_DATA.each do |n|
|
|
15
17
|
data << normalizer.normalize(n)
|
16
18
|
end
|
17
19
|
|
18
|
-
a = SOM.new(data, :nodes =>
|
19
|
-
|
20
|
-
puts a.nodes.inspect
|
20
|
+
a = SOM.new(data, :nodes => 8,
|
21
|
+
:radius => 0.8)
|
21
22
|
|
22
23
|
puts a.global_distance_error
|
23
24
|
|
@@ -27,4 +28,6 @@ end
|
|
27
28
|
|
28
29
|
puts a.global_distance_error
|
29
30
|
|
31
|
+
#puts a.nodes.inspect
|
32
|
+
|
30
33
|
puts times
|
data/lib/som/node.rb
CHANGED
data/lib/som.rb
CHANGED
@@ -4,14 +4,18 @@ class SOM
|
|
4
4
|
|
5
5
|
def initialize(training_data, options={})
|
6
6
|
@training_data = training_data
|
7
|
+
@dimensions = training_data[0].size
|
8
|
+
@iteration_count = 1
|
9
|
+
|
10
|
+
# Options
|
7
11
|
@number_of_nodes = options[:nodes] || 5
|
8
|
-
@dimensions = options[:dimensions]
|
9
12
|
@learning_rate = options[:learning_rate] || 0.5
|
10
13
|
@radius = options[:radius] || @number_of_nodes / 2
|
11
|
-
@iteration_count = 1
|
12
14
|
@max_iterations = options[:max_iterations] || 100
|
15
|
+
|
13
16
|
# TODO: Allow a lambda so we can use different neighborhood functions
|
14
|
-
@neighborhood_function = options[:neighborhood_function] || 1
|
17
|
+
@neighborhood_function = 1 #options[:neighborhood_function] || 1
|
18
|
+
@verbose = options[:verbose]
|
15
19
|
|
16
20
|
create_nodes(training_data)
|
17
21
|
end
|
@@ -28,25 +32,25 @@ class SOM
|
|
28
32
|
place_data_into_buckets(@training_data)
|
29
33
|
end
|
30
34
|
|
31
|
-
# Returns an array of buckets containing the index of the data
|
35
|
+
# Returns an array of buckets containing the index of the training data
|
32
36
|
def inspect
|
33
|
-
nodes.map {|x| x.
|
37
|
+
nodes.map {|x| [x.id, x.bucket] }
|
34
38
|
end
|
35
39
|
|
36
|
-
# Return data from node that is closest to data
|
37
|
-
# You are returned
|
38
|
-
# [
|
39
|
-
# The index is the original index of that that was pumped into the
|
40
|
+
# Return training data from the node that is closest to input data
|
41
|
+
# You are returned an array that look like:
|
42
|
+
# [node_id, [training_data_index_1, training_data_index_2...]]
|
43
|
+
# The index is the original index of that that was pumped into the SOM
|
40
44
|
# during the training process
|
41
45
|
def classify(data)
|
42
46
|
closest_node = find_closest_node(data)
|
43
|
-
closest_node.bucket
|
47
|
+
[closest_node.id, closest_node.bucket]
|
44
48
|
end
|
45
49
|
|
46
50
|
# Taken from AI4R SOM library #107
|
47
51
|
def global_distance_error
|
48
52
|
@training_data.inject(0) do |sum, n|
|
49
|
-
sum +
|
53
|
+
sum + find_closest_node_with_distance(n)[1]
|
50
54
|
end
|
51
55
|
end
|
52
56
|
|
@@ -54,17 +58,25 @@ class SOM
|
|
54
58
|
|
55
59
|
def train_it!(data)
|
56
60
|
return false if @iteration_count >= @max_iterations
|
61
|
+
|
62
|
+
print_message("Iteration: #{@iteration_count}")
|
57
63
|
|
58
|
-
data.
|
64
|
+
data.each_with_index do |input, index|
|
65
|
+
print_message("\tLooking at data #{index+1}/#{data.size}")
|
66
|
+
|
59
67
|
# Update closest node
|
68
|
+
print_message("\t\tUpdating closest node")
|
69
|
+
|
60
70
|
closest_node = find_closest_node(input)
|
61
71
|
closest_node.update_weight(@learning_rate, input)
|
62
72
|
|
63
73
|
# Update nodes that closer than the radius
|
64
74
|
other_nodes = nodes - [closest_node]
|
65
|
-
other_nodes.
|
66
|
-
next if
|
67
|
-
|
75
|
+
other_nodes.each_with_index do |node, index|
|
76
|
+
next if node.distance_from(closest_node.weights) > decayed_radius
|
77
|
+
|
78
|
+
print_message("\t\tUpdating other nodes: #{index+1}/#{other_nodes.size}")
|
79
|
+
|
68
80
|
node.update_weight(@learning_rate, input, neighborhood_function)
|
69
81
|
end
|
70
82
|
end
|
@@ -72,34 +84,37 @@ class SOM
|
|
72
84
|
increase_iteration_count!
|
73
85
|
end
|
74
86
|
|
87
|
+
# This places the training data into its closest node's bucket.
|
75
88
|
def place_data_into_buckets(data)
|
76
89
|
data.each_with_index do |input, index|
|
77
90
|
closest_node = find_closest_node(input)
|
78
|
-
closest_node <<
|
91
|
+
closest_node << index
|
79
92
|
end
|
80
93
|
end
|
81
94
|
|
82
95
|
def decayed_radius
|
83
|
-
@radius - (0.
|
96
|
+
@radius - (0.7 * @radius * @iteration_count / @max_iterations)
|
84
97
|
end
|
85
98
|
|
86
99
|
def decayed_learning_rate
|
87
|
-
@learning_rate - (0.
|
100
|
+
@learning_rate - (0.7 * @learning_rate * @iteration_count / @max_iterations)
|
88
101
|
end
|
89
102
|
|
90
|
-
def increase_iteration_count!
|
91
|
-
@iteration_count += 1
|
92
|
-
end
|
93
|
-
|
94
103
|
def neighborhood_function
|
95
104
|
0.5 * @neighborhood_function * @iteration_count / @max_iterations
|
96
105
|
end
|
97
106
|
|
107
|
+
def increase_iteration_count!
|
108
|
+
@iteration_count += 1
|
109
|
+
end
|
110
|
+
|
98
111
|
def find_closest_node(data)
|
99
|
-
|
112
|
+
find_closest_node_with_distance(data)[0]
|
100
113
|
end
|
101
114
|
|
102
|
-
|
115
|
+
# Finds the closest node to some data and returns the node
|
116
|
+
# and its distance from the data => [node, distance]
|
117
|
+
def find_closest_node_with_distance(data)
|
103
118
|
closest_node = [nodes[0], nodes[0].distance_from(data)]
|
104
119
|
|
105
120
|
nodes[1..-1].each do |node|
|
@@ -110,9 +125,15 @@ class SOM
|
|
110
125
|
end
|
111
126
|
closest_node
|
112
127
|
end
|
113
|
-
|
128
|
+
|
114
129
|
def create_nodes(data)
|
115
|
-
@number_of_nodes.times
|
130
|
+
@number_of_nodes.times do |n|
|
131
|
+
nodes << Node.new(n, @dimensions)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def print_message(message)
|
136
|
+
puts message if @verbose == true
|
116
137
|
end
|
117
138
|
|
118
139
|
end
|
data/som.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{som}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["reddavis"]
|
12
|
-
s.date = %q{2009-11-
|
12
|
+
s.date = %q{2009-11-30}
|
13
13
|
s.description = %q{A Self Organising Map}
|
14
14
|
s.email = %q{reddavis@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/spec/node_spec.rb
CHANGED
@@ -3,18 +3,22 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
3
3
|
describe "Node" do
|
4
4
|
describe "Initialization" do
|
5
5
|
before do
|
6
|
-
@a = Node.new(5)
|
6
|
+
@a = Node.new(1, 5)
|
7
7
|
end
|
8
8
|
|
9
9
|
it "should have 5 weights" do
|
10
10
|
@a.weights.size.should == 5
|
11
11
|
end
|
12
|
+
|
13
|
+
it "should have an ID" do
|
14
|
+
@a.id.should == 1
|
15
|
+
end
|
12
16
|
end
|
13
17
|
|
14
18
|
describe "Distance Calculation" do
|
15
19
|
before do
|
16
|
-
@a = Node.new(2)
|
17
|
-
@b = Node.new(2)
|
20
|
+
@a = Node.new(1, 2)
|
21
|
+
@b = Node.new(1, 2)
|
18
22
|
end
|
19
23
|
|
20
24
|
it "should return 0" do
|
@@ -37,7 +41,7 @@ describe "Node" do
|
|
37
41
|
describe "Update Weight" do
|
38
42
|
describe "Closest" do
|
39
43
|
before do
|
40
|
-
@a = Node.new(2)
|
44
|
+
@a = Node.new(1, 2)
|
41
45
|
@data = [1,2]
|
42
46
|
end
|
43
47
|
|
@@ -50,7 +54,7 @@ describe "Node" do
|
|
50
54
|
|
51
55
|
describe "Neighbor" do
|
52
56
|
before do
|
53
|
-
@a = Node.new(2)
|
57
|
+
@a = Node.new(1, 2)
|
54
58
|
@data = [1,2]
|
55
59
|
end
|
56
60
|
|
@@ -64,7 +68,7 @@ describe "Node" do
|
|
64
68
|
|
65
69
|
describe "Bucket" do
|
66
70
|
before do
|
67
|
-
@a = Node.new(2)
|
71
|
+
@a = Node.new(1, 2)
|
68
72
|
end
|
69
73
|
|
70
74
|
it "should put data into the nodes bucket" do
|
data/spec/som_spec.rb
CHANGED
@@ -3,8 +3,8 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
3
3
|
describe "Som" do
|
4
4
|
describe "Training" do
|
5
5
|
before do
|
6
|
-
@data = [[0,0]
|
7
|
-
@a = SOM.new(@data, :nodes => 1
|
6
|
+
@data = [[0,0]]
|
7
|
+
@a = SOM.new(@data, :nodes => 1)
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should change the weight of the best matching node" do
|
@@ -22,31 +22,32 @@ describe "Som" do
|
|
22
22
|
|
23
23
|
it "should preserve data indexes" do
|
24
24
|
@a.train
|
25
|
-
|
26
|
-
index_returned = @a.nodes[0].bucket[0]
|
27
|
-
|
28
|
-
|
29
|
-
@data[index_returned].should == data_returned
|
25
|
+
|
26
|
+
index_returned = @a.nodes[0].bucket[0]
|
27
|
+
index_returned.should == 0
|
30
28
|
end
|
31
29
|
end
|
32
30
|
|
33
31
|
describe "Inspect" do
|
34
32
|
before do
|
35
33
|
data = [[2,3]]
|
36
|
-
@a = SOM.new(data, :nodes => 1
|
34
|
+
@a = SOM.new(data, :nodes => 1)
|
35
|
+
@a.train
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should return the id of the nodes" do
|
39
|
+
@a.inspect[0][0].should == 0
|
37
40
|
end
|
38
41
|
|
39
42
|
it "should show the clusters of data indexes" do
|
40
|
-
@a.
|
41
|
-
@a.inspect.should be_an(Array)
|
42
|
-
@a.inspect.size.should == 1
|
43
|
+
@a.inspect[0][1].should be_an(Array)
|
43
44
|
end
|
44
45
|
end
|
45
46
|
|
46
47
|
describe "Clustering" do
|
47
48
|
before do
|
48
49
|
data = [[0,0], [999,999]]
|
49
|
-
@a = SOM.new(data, :nodes => 2
|
50
|
+
@a = SOM.new(data, :nodes => 2)
|
50
51
|
end
|
51
52
|
|
52
53
|
it "should belong to 2 seperate nodes" do
|
@@ -59,20 +60,29 @@ describe "Som" do
|
|
59
60
|
describe "Classify" do
|
60
61
|
before do
|
61
62
|
data = [[0,0], [999,999]]
|
62
|
-
|
63
|
+
a = SOM.new(data, :nodes => 1)
|
64
|
+
a.train
|
65
|
+
@a = a.classify([1,1])
|
63
66
|
end
|
64
67
|
|
65
68
|
it "should belong to 2 seperate nodes" do
|
66
|
-
@a.
|
67
|
-
@a.
|
68
|
-
|
69
|
+
@a.should be_an(Array)
|
70
|
+
@a.size.should == 2
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should return a node id" do
|
74
|
+
@a[0].should == 0
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should return an array of training_data ids" do
|
78
|
+
@a[1].should be_an(Array)
|
69
79
|
end
|
70
80
|
end
|
71
81
|
|
72
82
|
describe "Global Distance Error" do
|
73
83
|
before do
|
74
84
|
data = [[0,0], [999,999]]
|
75
|
-
@a = SOM.new(data, :nodes => 2
|
85
|
+
@a = SOM.new(data, :nodes => 2)
|
76
86
|
end
|
77
87
|
|
78
88
|
it "should return an integer" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: som
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- reddavis
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-11-
|
12
|
+
date: 2009-11-30 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|