som 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +24 -5
- data/VERSION +1 -1
- data/examples/example.rb +6 -3
- data/lib/som/node.rb +3 -2
- data/lib/som.rb +47 -26
- data/som.gemspec +2 -2
- data/spec/node_spec.rb +10 -6
- data/spec/som_spec.rb +27 -17
- metadata +2 -2
data/README.rdoc
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
= SOM - Self Organising Map
|
2
2
|
|
3
|
-
A pure Ruby implementation of the Self Organising Map machine learning
|
3
|
+
A pure Ruby implementation of the Self Organising Map machine learning algorithm.
|
4
4
|
|
5
5
|
== Install
|
6
6
|
|
7
|
-
gem sources -a
|
7
|
+
gem sources -a http://gemcutter.org
|
8
8
|
sudo gem install som
|
9
9
|
|
10
10
|
== How To Use
|
@@ -16,11 +16,30 @@ A pure Ruby implementation of the Self Organising Map machine learning Algorithm
|
|
16
16
|
|
17
17
|
a = SOM.new(data, :number_of_nodes => 4, :dimensions => 3)
|
18
18
|
a.train
|
19
|
+
|
20
|
+
# To see which class a new piece of data fits into
|
21
|
+
new_data = [9,8,7]
|
22
|
+
|
23
|
+
# An array is returned containing the index of the
|
24
|
+
# training data that fits into the same class
|
25
|
+
# The index is the same as the index in the training data e.g:
|
26
|
+
# data[index_returned_by_SOM (2)] == data[2]
|
27
|
+
a.classify(new_data)
|
28
|
+
#=> [node_index, [training_data_index_1, training_data_index_2...]]
|
19
29
|
|
20
|
-
# Returns the
|
30
|
+
# Returns the id of a node and the
|
31
|
+
# index of the data that belongs to it
|
21
32
|
a.inspect
|
22
|
-
#=> [[1, 0...], [99, 84...], [
|
33
|
+
#=> [[0, [1, 0...]], [1, [99, 84...]], [2, [11, 23...]]]
|
34
|
+
|
35
|
+
== Options
|
36
|
+
|
37
|
+
SOM.new(data, :number_of_nodes => 1, #Default: 5
|
38
|
+
:learning_rate => 0.7, #Default: 0.5
|
39
|
+
:radius => 1, #Default: number_of_nodes / 2
|
40
|
+
:max_iterations => 100, #Default: 100
|
41
|
+
:verbose => true) #Default: false
|
23
42
|
|
24
43
|
== Copyright
|
25
44
|
|
26
|
-
Copyright (c) 2009 Red Davis. See LICENSE for details.
|
45
|
+
Copyright (c) 2009 Red Davis. See LICENSE for details.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.3
|
data/examples/example.rb
CHANGED
@@ -5,6 +5,8 @@ require 'benchmark'
|
|
5
5
|
require 'rubygems'
|
6
6
|
require 'normalizer'
|
7
7
|
|
8
|
+
#SOM_DATA = Array.new(100) { Array.new(50) {rand}}
|
9
|
+
|
8
10
|
min, max = Normalizer.find_min_and_max(SOM_DATA)
|
9
11
|
|
10
12
|
normalizer = Normalizer.new(:min => min, :max => max)
|
@@ -15,9 +17,8 @@ SOM_DATA.each do |n|
|
|
15
17
|
data << normalizer.normalize(n)
|
16
18
|
end
|
17
19
|
|
18
|
-
a = SOM.new(data, :nodes =>
|
19
|
-
|
20
|
-
puts a.nodes.inspect
|
20
|
+
a = SOM.new(data, :nodes => 8,
|
21
|
+
:radius => 0.8)
|
21
22
|
|
22
23
|
puts a.global_distance_error
|
23
24
|
|
@@ -27,4 +28,6 @@ end
|
|
27
28
|
|
28
29
|
puts a.global_distance_error
|
29
30
|
|
31
|
+
#puts a.nodes.inspect
|
32
|
+
|
30
33
|
puts times
|
data/lib/som/node.rb
CHANGED
data/lib/som.rb
CHANGED
@@ -4,14 +4,18 @@ class SOM
|
|
4
4
|
|
5
5
|
def initialize(training_data, options={})
|
6
6
|
@training_data = training_data
|
7
|
+
@dimensions = training_data[0].size
|
8
|
+
@iteration_count = 1
|
9
|
+
|
10
|
+
# Options
|
7
11
|
@number_of_nodes = options[:nodes] || 5
|
8
|
-
@dimensions = options[:dimensions]
|
9
12
|
@learning_rate = options[:learning_rate] || 0.5
|
10
13
|
@radius = options[:radius] || @number_of_nodes / 2
|
11
|
-
@iteration_count = 1
|
12
14
|
@max_iterations = options[:max_iterations] || 100
|
15
|
+
|
13
16
|
# TODO: Allow a lambda so we can use different neighborhood functions
|
14
|
-
@neighborhood_function = options[:neighborhood_function] || 1
|
17
|
+
@neighborhood_function = 1 #options[:neighborhood_function] || 1
|
18
|
+
@verbose = options[:verbose]
|
15
19
|
|
16
20
|
create_nodes(training_data)
|
17
21
|
end
|
@@ -28,25 +32,25 @@ class SOM
|
|
28
32
|
place_data_into_buckets(@training_data)
|
29
33
|
end
|
30
34
|
|
31
|
-
# Returns an array of buckets containing the index of the data
|
35
|
+
# Returns an array of buckets containing the index of the training data
|
32
36
|
def inspect
|
33
|
-
nodes.map {|x| x.
|
37
|
+
nodes.map {|x| [x.id, x.bucket] }
|
34
38
|
end
|
35
39
|
|
36
|
-
# Return data from node that is closest to data
|
37
|
-
# You are returned
|
38
|
-
# [
|
39
|
-
# The index is the original index of that that was pumped into the
|
40
|
+
# Return training data from the node that is closest to input data
|
41
|
+
# You are returned an array that look like:
|
42
|
+
# [node_id, [training_data_index_1, training_data_index_2...]]
|
43
|
+
# The index is the original index of that that was pumped into the SOM
|
40
44
|
# during the training process
|
41
45
|
def classify(data)
|
42
46
|
closest_node = find_closest_node(data)
|
43
|
-
closest_node.bucket
|
47
|
+
[closest_node.id, closest_node.bucket]
|
44
48
|
end
|
45
49
|
|
46
50
|
# Taken from AI4R SOM library #107
|
47
51
|
def global_distance_error
|
48
52
|
@training_data.inject(0) do |sum, n|
|
49
|
-
sum +
|
53
|
+
sum + find_closest_node_with_distance(n)[1]
|
50
54
|
end
|
51
55
|
end
|
52
56
|
|
@@ -54,17 +58,25 @@ class SOM
|
|
54
58
|
|
55
59
|
def train_it!(data)
|
56
60
|
return false if @iteration_count >= @max_iterations
|
61
|
+
|
62
|
+
print_message("Iteration: #{@iteration_count}")
|
57
63
|
|
58
|
-
data.
|
64
|
+
data.each_with_index do |input, index|
|
65
|
+
print_message("\tLooking at data #{index+1}/#{data.size}")
|
66
|
+
|
59
67
|
# Update closest node
|
68
|
+
print_message("\t\tUpdating closest node")
|
69
|
+
|
60
70
|
closest_node = find_closest_node(input)
|
61
71
|
closest_node.update_weight(@learning_rate, input)
|
62
72
|
|
63
73
|
# Update nodes that closer than the radius
|
64
74
|
other_nodes = nodes - [closest_node]
|
65
|
-
other_nodes.
|
66
|
-
next if
|
67
|
-
|
75
|
+
other_nodes.each_with_index do |node, index|
|
76
|
+
next if node.distance_from(closest_node.weights) > decayed_radius
|
77
|
+
|
78
|
+
print_message("\t\tUpdating other nodes: #{index+1}/#{other_nodes.size}")
|
79
|
+
|
68
80
|
node.update_weight(@learning_rate, input, neighborhood_function)
|
69
81
|
end
|
70
82
|
end
|
@@ -72,34 +84,37 @@ class SOM
|
|
72
84
|
increase_iteration_count!
|
73
85
|
end
|
74
86
|
|
87
|
+
# This places the training data into its closest node's bucket.
|
75
88
|
def place_data_into_buckets(data)
|
76
89
|
data.each_with_index do |input, index|
|
77
90
|
closest_node = find_closest_node(input)
|
78
|
-
closest_node <<
|
91
|
+
closest_node << index
|
79
92
|
end
|
80
93
|
end
|
81
94
|
|
82
95
|
def decayed_radius
|
83
|
-
@radius - (0.
|
96
|
+
@radius - (0.7 * @radius * @iteration_count / @max_iterations)
|
84
97
|
end
|
85
98
|
|
86
99
|
def decayed_learning_rate
|
87
|
-
@learning_rate - (0.
|
100
|
+
@learning_rate - (0.7 * @learning_rate * @iteration_count / @max_iterations)
|
88
101
|
end
|
89
102
|
|
90
|
-
def increase_iteration_count!
|
91
|
-
@iteration_count += 1
|
92
|
-
end
|
93
|
-
|
94
103
|
def neighborhood_function
|
95
104
|
0.5 * @neighborhood_function * @iteration_count / @max_iterations
|
96
105
|
end
|
97
106
|
|
107
|
+
def increase_iteration_count!
|
108
|
+
@iteration_count += 1
|
109
|
+
end
|
110
|
+
|
98
111
|
def find_closest_node(data)
|
99
|
-
|
112
|
+
find_closest_node_with_distance(data)[0]
|
100
113
|
end
|
101
114
|
|
102
|
-
|
115
|
+
# Finds the closest node to some data and returns the node
|
116
|
+
# and its distance from the data => [node, distance]
|
117
|
+
def find_closest_node_with_distance(data)
|
103
118
|
closest_node = [nodes[0], nodes[0].distance_from(data)]
|
104
119
|
|
105
120
|
nodes[1..-1].each do |node|
|
@@ -110,9 +125,15 @@ class SOM
|
|
110
125
|
end
|
111
126
|
closest_node
|
112
127
|
end
|
113
|
-
|
128
|
+
|
114
129
|
def create_nodes(data)
|
115
|
-
@number_of_nodes.times
|
130
|
+
@number_of_nodes.times do |n|
|
131
|
+
nodes << Node.new(n, @dimensions)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def print_message(message)
|
136
|
+
puts message if @verbose == true
|
116
137
|
end
|
117
138
|
|
118
139
|
end
|
data/som.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{som}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.3"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["reddavis"]
|
12
|
-
s.date = %q{2009-11-
|
12
|
+
s.date = %q{2009-11-30}
|
13
13
|
s.description = %q{A Self Organising Map}
|
14
14
|
s.email = %q{reddavis@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/spec/node_spec.rb
CHANGED
@@ -3,18 +3,22 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
3
3
|
describe "Node" do
|
4
4
|
describe "Initialization" do
|
5
5
|
before do
|
6
|
-
@a = Node.new(5)
|
6
|
+
@a = Node.new(1, 5)
|
7
7
|
end
|
8
8
|
|
9
9
|
it "should have 5 weights" do
|
10
10
|
@a.weights.size.should == 5
|
11
11
|
end
|
12
|
+
|
13
|
+
it "should have an ID" do
|
14
|
+
@a.id.should == 1
|
15
|
+
end
|
12
16
|
end
|
13
17
|
|
14
18
|
describe "Distance Calculation" do
|
15
19
|
before do
|
16
|
-
@a = Node.new(2)
|
17
|
-
@b = Node.new(2)
|
20
|
+
@a = Node.new(1, 2)
|
21
|
+
@b = Node.new(1, 2)
|
18
22
|
end
|
19
23
|
|
20
24
|
it "should return 0" do
|
@@ -37,7 +41,7 @@ describe "Node" do
|
|
37
41
|
describe "Update Weight" do
|
38
42
|
describe "Closest" do
|
39
43
|
before do
|
40
|
-
@a = Node.new(2)
|
44
|
+
@a = Node.new(1, 2)
|
41
45
|
@data = [1,2]
|
42
46
|
end
|
43
47
|
|
@@ -50,7 +54,7 @@ describe "Node" do
|
|
50
54
|
|
51
55
|
describe "Neighbor" do
|
52
56
|
before do
|
53
|
-
@a = Node.new(2)
|
57
|
+
@a = Node.new(1, 2)
|
54
58
|
@data = [1,2]
|
55
59
|
end
|
56
60
|
|
@@ -64,7 +68,7 @@ describe "Node" do
|
|
64
68
|
|
65
69
|
describe "Bucket" do
|
66
70
|
before do
|
67
|
-
@a = Node.new(2)
|
71
|
+
@a = Node.new(1, 2)
|
68
72
|
end
|
69
73
|
|
70
74
|
it "should put data into the nodes bucket" do
|
data/spec/som_spec.rb
CHANGED
@@ -3,8 +3,8 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
3
3
|
describe "Som" do
|
4
4
|
describe "Training" do
|
5
5
|
before do
|
6
|
-
@data = [[0,0]
|
7
|
-
@a = SOM.new(@data, :nodes => 1
|
6
|
+
@data = [[0,0]]
|
7
|
+
@a = SOM.new(@data, :nodes => 1)
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should change the weight of the best matching node" do
|
@@ -22,31 +22,32 @@ describe "Som" do
|
|
22
22
|
|
23
23
|
it "should preserve data indexes" do
|
24
24
|
@a.train
|
25
|
-
|
26
|
-
index_returned = @a.nodes[0].bucket[0]
|
27
|
-
|
28
|
-
|
29
|
-
@data[index_returned].should == data_returned
|
25
|
+
|
26
|
+
index_returned = @a.nodes[0].bucket[0]
|
27
|
+
index_returned.should == 0
|
30
28
|
end
|
31
29
|
end
|
32
30
|
|
33
31
|
describe "Inspect" do
|
34
32
|
before do
|
35
33
|
data = [[2,3]]
|
36
|
-
@a = SOM.new(data, :nodes => 1
|
34
|
+
@a = SOM.new(data, :nodes => 1)
|
35
|
+
@a.train
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should return the id of the nodes" do
|
39
|
+
@a.inspect[0][0].should == 0
|
37
40
|
end
|
38
41
|
|
39
42
|
it "should show the clusters of data indexes" do
|
40
|
-
@a.
|
41
|
-
@a.inspect.should be_an(Array)
|
42
|
-
@a.inspect.size.should == 1
|
43
|
+
@a.inspect[0][1].should be_an(Array)
|
43
44
|
end
|
44
45
|
end
|
45
46
|
|
46
47
|
describe "Clustering" do
|
47
48
|
before do
|
48
49
|
data = [[0,0], [999,999]]
|
49
|
-
@a = SOM.new(data, :nodes => 2
|
50
|
+
@a = SOM.new(data, :nodes => 2)
|
50
51
|
end
|
51
52
|
|
52
53
|
it "should belong to 2 seperate nodes" do
|
@@ -59,20 +60,29 @@ describe "Som" do
|
|
59
60
|
describe "Classify" do
|
60
61
|
before do
|
61
62
|
data = [[0,0], [999,999]]
|
62
|
-
|
63
|
+
a = SOM.new(data, :nodes => 1)
|
64
|
+
a.train
|
65
|
+
@a = a.classify([1,1])
|
63
66
|
end
|
64
67
|
|
65
68
|
it "should belong to 2 seperate nodes" do
|
66
|
-
@a.
|
67
|
-
@a.
|
68
|
-
|
69
|
+
@a.should be_an(Array)
|
70
|
+
@a.size.should == 2
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should return a node id" do
|
74
|
+
@a[0].should == 0
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should return an array of training_data ids" do
|
78
|
+
@a[1].should be_an(Array)
|
69
79
|
end
|
70
80
|
end
|
71
81
|
|
72
82
|
describe "Global Distance Error" do
|
73
83
|
before do
|
74
84
|
data = [[0,0], [999,999]]
|
75
|
-
@a = SOM.new(data, :nodes => 2
|
85
|
+
@a = SOM.new(data, :nodes => 2)
|
76
86
|
end
|
77
87
|
|
78
88
|
it "should return an integer" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: som
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- reddavis
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-11-
|
12
|
+
date: 2009-11-30 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|