som 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -1,10 +1,10 @@
1
1
  = SOM - Self Organising Map
2
2
 
3
- A pure Ruby implementation of the Self Organising Map machine learning Algorithm.
3
+ A pure Ruby implementation of the Self Organising Map machine learning algorithm.
4
4
 
5
5
  == Install
6
6
 
7
- gem sources -a -http://gemcutter.org
7
+ gem sources -a http://gemcutter.org
8
8
  sudo gem install som
9
9
 
10
10
  == How To Use
@@ -16,11 +16,30 @@ A pure Ruby implementation of the Self Organising Map machine learning Algorithm
16
16
 
17
17
  a = SOM.new(data, :number_of_nodes => 4, :dimensions => 3)
18
18
  a.train
19
+
20
+ # To see which class a new piece of data fits into
21
+ new_data = [9,8,7]
22
+
23
+ # An array is returned containing the index of the
24
+ # training data that fits into the same class
25
+ # The index is the same as the index in the training data e.g:
26
+ # data[index_returned_by_SOM (2)] == data[2]
27
+ a.classify(new_data)
28
+ #=> [node_index, [training_data_index_1, training_data_index_2...]]
19
29
 
20
- # Returns the index of the data you gave it
30
+ # Returns the id of a node and the
31
+ # index of the data that belongs to it
21
32
  a.inspect
22
- #=> [[1, 0...], [99, 84...], [11, 23...], [2, 6...]]
33
+ #=> [[0, [1, 0...]], [1, [99, 84...]], [2, [11, 23...]]]
34
+
35
+ == Options
36
+
37
+ SOM.new(data, :number_of_nodes => 1, #Default: 5
38
+ :learning_rate => 0.7, #Default: 0.5
39
+ :radius => 1, #Default: number_of_nodes / 2
40
+ :max_iterations => 100, #Default: 100
41
+ :verbose => true) #Default: false
23
42
 
24
43
  == Copyright
25
44
 
26
- Copyright (c) 2009 Red Davis. See LICENSE for details.
45
+ Copyright (c) 2009 Red Davis. See LICENSE for details.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
data/examples/example.rb CHANGED
@@ -5,6 +5,8 @@ require 'benchmark'
5
5
  require 'rubygems'
6
6
  require 'normalizer'
7
7
 
8
+ #SOM_DATA = Array.new(100) { Array.new(50) {rand}}
9
+
8
10
  min, max = Normalizer.find_min_and_max(SOM_DATA)
9
11
 
10
12
  normalizer = Normalizer.new(:min => min, :max => max)
@@ -15,9 +17,8 @@ SOM_DATA.each do |n|
15
17
  data << normalizer.normalize(n)
16
18
  end
17
19
 
18
- a = SOM.new(data, :nodes => 10, :dimensions => data[0].size)
19
-
20
- puts a.nodes.inspect
20
+ a = SOM.new(data, :nodes => 8,
21
+ :radius => 0.8)
21
22
 
22
23
  puts a.global_distance_error
23
24
 
@@ -27,4 +28,6 @@ end
27
28
 
28
29
  puts a.global_distance_error
29
30
 
31
+ #puts a.nodes.inspect
32
+
30
33
  puts times
data/lib/som/node.rb CHANGED
@@ -1,9 +1,10 @@
1
1
  class Node
2
2
 
3
- attr_reader :bucket
3
+ attr_reader :bucket, :id
4
4
 
5
- def initialize(number_of_weights)
5
+ def initialize(id, number_of_weights)
6
6
  create_weights(number_of_weights)
7
+ @id = id
7
8
  @bucket = []
8
9
  end
9
10
 
data/lib/som.rb CHANGED
@@ -4,14 +4,18 @@ class SOM
4
4
 
5
5
  def initialize(training_data, options={})
6
6
  @training_data = training_data
7
+ @dimensions = training_data[0].size
8
+ @iteration_count = 1
9
+
10
+ # Options
7
11
  @number_of_nodes = options[:nodes] || 5
8
- @dimensions = options[:dimensions]
9
12
  @learning_rate = options[:learning_rate] || 0.5
10
13
  @radius = options[:radius] || @number_of_nodes / 2
11
- @iteration_count = 1
12
14
  @max_iterations = options[:max_iterations] || 100
15
+
13
16
  # TODO: Allow a lambda so we can use different neighborhood functions
14
- @neighborhood_function = options[:neighborhood_function] || 1
17
+ @neighborhood_function = 1 #options[:neighborhood_function] || 1
18
+ @verbose = options[:verbose]
15
19
 
16
20
  create_nodes(training_data)
17
21
  end
@@ -28,25 +32,25 @@ class SOM
28
32
  place_data_into_buckets(@training_data)
29
33
  end
30
34
 
31
- # Returns an array of buckets containing the index of the data given
35
+ # Returns an array of buckets containing the index of the training data
32
36
  def inspect
33
- nodes.map {|x| x.bucket.map {|x| x[0]}}
37
+ nodes.map {|x| [x.id, x.bucket] }
34
38
  end
35
39
 
36
- # Return data from node that is closest to data
37
- # You are returned a bucket which contains arrays that look like:
38
- # [index, [data]]
39
- # The index is the original index of that that was pumped into the classifier
40
+ # Return training data from the node that is closest to input data
41
+ # You are returned an array that look like:
42
+ # [node_id, [training_data_index_1, training_data_index_2...]]
43
+ # The index is the original index of that that was pumped into the SOM
40
44
  # during the training process
41
45
  def classify(data)
42
46
  closest_node = find_closest_node(data)
43
- closest_node.bucket
47
+ [closest_node.id, closest_node.bucket]
44
48
  end
45
49
 
46
50
  # Taken from AI4R SOM library #107
47
51
  def global_distance_error
48
52
  @training_data.inject(0) do |sum, n|
49
- sum + find_closest(n)[1]
53
+ sum + find_closest_node_with_distance(n)[1]
50
54
  end
51
55
  end
52
56
 
@@ -54,17 +58,25 @@ class SOM
54
58
 
55
59
  def train_it!(data)
56
60
  return false if @iteration_count >= @max_iterations
61
+
62
+ print_message("Iteration: #{@iteration_count}")
57
63
 
58
- data.each do |input|
64
+ data.each_with_index do |input, index|
65
+ print_message("\tLooking at data #{index+1}/#{data.size}")
66
+
59
67
  # Update closest node
68
+ print_message("\t\tUpdating closest node")
69
+
60
70
  closest_node = find_closest_node(input)
61
71
  closest_node.update_weight(@learning_rate, input)
62
72
 
63
73
  # Update nodes that closer than the radius
64
74
  other_nodes = nodes - [closest_node]
65
- other_nodes.each do |node|
66
- next if decayed_radius > node.distance_from(closest_node.weights)
67
-
75
+ other_nodes.each_with_index do |node, index|
76
+ next if node.distance_from(closest_node.weights) > decayed_radius
77
+
78
+ print_message("\t\tUpdating other nodes: #{index+1}/#{other_nodes.size}")
79
+
68
80
  node.update_weight(@learning_rate, input, neighborhood_function)
69
81
  end
70
82
  end
@@ -72,34 +84,37 @@ class SOM
72
84
  increase_iteration_count!
73
85
  end
74
86
 
87
+ # This places the training data into its closest node's bucket.
75
88
  def place_data_into_buckets(data)
76
89
  data.each_with_index do |input, index|
77
90
  closest_node = find_closest_node(input)
78
- closest_node << [index, input]
91
+ closest_node << index
79
92
  end
80
93
  end
81
94
 
82
95
  def decayed_radius
83
- @radius - (0.1 * @radius * @iteration_count / @max_iterations)
96
+ @radius - (0.7 * @radius * @iteration_count / @max_iterations)
84
97
  end
85
98
 
86
99
  def decayed_learning_rate
87
- @learning_rate - (0.5 * @learning_rate * @iteration_count / @max_iterations)
100
+ @learning_rate - (0.7 * @learning_rate * @iteration_count / @max_iterations)
88
101
  end
89
102
 
90
- def increase_iteration_count!
91
- @iteration_count += 1
92
- end
93
-
94
103
  def neighborhood_function
95
104
  0.5 * @neighborhood_function * @iteration_count / @max_iterations
96
105
  end
97
106
 
107
+ def increase_iteration_count!
108
+ @iteration_count += 1
109
+ end
110
+
98
111
  def find_closest_node(data)
99
- find_closest(data)[0]
112
+ find_closest_node_with_distance(data)[0]
100
113
  end
101
114
 
102
- def find_closest(data)
115
+ # Finds the closest node to some data and returns the node
116
+ # and its distance from the data => [node, distance]
117
+ def find_closest_node_with_distance(data)
103
118
  closest_node = [nodes[0], nodes[0].distance_from(data)]
104
119
 
105
120
  nodes[1..-1].each do |node|
@@ -110,9 +125,15 @@ class SOM
110
125
  end
111
126
  closest_node
112
127
  end
113
-
128
+
114
129
  def create_nodes(data)
115
- @number_of_nodes.times { nodes << Node.new(@dimensions) }
130
+ @number_of_nodes.times do |n|
131
+ nodes << Node.new(n, @dimensions)
132
+ end
133
+ end
134
+
135
+ def print_message(message)
136
+ puts message if @verbose == true
116
137
  end
117
138
 
118
139
  end
data/som.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{som}
8
- s.version = "0.0.2"
8
+ s.version = "0.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2009-11-29}
12
+ s.date = %q{2009-11-30}
13
13
  s.description = %q{A Self Organising Map}
14
14
  s.email = %q{reddavis@gmail.com}
15
15
  s.extra_rdoc_files = [
data/spec/node_spec.rb CHANGED
@@ -3,18 +3,22 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
  describe "Node" do
4
4
  describe "Initialization" do
5
5
  before do
6
- @a = Node.new(5)
6
+ @a = Node.new(1, 5)
7
7
  end
8
8
 
9
9
  it "should have 5 weights" do
10
10
  @a.weights.size.should == 5
11
11
  end
12
+
13
+ it "should have an ID" do
14
+ @a.id.should == 1
15
+ end
12
16
  end
13
17
 
14
18
  describe "Distance Calculation" do
15
19
  before do
16
- @a = Node.new(2)
17
- @b = Node.new(2)
20
+ @a = Node.new(1, 2)
21
+ @b = Node.new(1, 2)
18
22
  end
19
23
 
20
24
  it "should return 0" do
@@ -37,7 +41,7 @@ describe "Node" do
37
41
  describe "Update Weight" do
38
42
  describe "Closest" do
39
43
  before do
40
- @a = Node.new(2)
44
+ @a = Node.new(1, 2)
41
45
  @data = [1,2]
42
46
  end
43
47
 
@@ -50,7 +54,7 @@ describe "Node" do
50
54
 
51
55
  describe "Neighbor" do
52
56
  before do
53
- @a = Node.new(2)
57
+ @a = Node.new(1, 2)
54
58
  @data = [1,2]
55
59
  end
56
60
 
@@ -64,7 +68,7 @@ describe "Node" do
64
68
 
65
69
  describe "Bucket" do
66
70
  before do
67
- @a = Node.new(2)
71
+ @a = Node.new(1, 2)
68
72
  end
69
73
 
70
74
  it "should put data into the nodes bucket" do
data/spec/som_spec.rb CHANGED
@@ -3,8 +3,8 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
  describe "Som" do
4
4
  describe "Training" do
5
5
  before do
6
- @data = [[0,0], [0,0.5], [2,4], [6,5]]
7
- @a = SOM.new(@data, :nodes => 1, :dimensions => 2)
6
+ @data = [[0,0]]
7
+ @a = SOM.new(@data, :nodes => 1)
8
8
  end
9
9
 
10
10
  it "should change the weight of the best matching node" do
@@ -22,31 +22,32 @@ describe "Som" do
22
22
 
23
23
  it "should preserve data indexes" do
24
24
  @a.train
25
-
26
- index_returned = @a.nodes[0].bucket[0][0]
27
- data_returned = @a.nodes[0].bucket[0][1]
28
-
29
- @data[index_returned].should == data_returned
25
+
26
+ index_returned = @a.nodes[0].bucket[0]
27
+ index_returned.should == 0
30
28
  end
31
29
  end
32
30
 
33
31
  describe "Inspect" do
34
32
  before do
35
33
  data = [[2,3]]
36
- @a = SOM.new(data, :nodes => 1, :dimensions => 2)
34
+ @a = SOM.new(data, :nodes => 1)
35
+ @a.train
36
+ end
37
+
38
+ it "should return the id of the nodes" do
39
+ @a.inspect[0][0].should == 0
37
40
  end
38
41
 
39
42
  it "should show the clusters of data indexes" do
40
- @a.train
41
- @a.inspect.should be_an(Array)
42
- @a.inspect.size.should == 1
43
+ @a.inspect[0][1].should be_an(Array)
43
44
  end
44
45
  end
45
46
 
46
47
  describe "Clustering" do
47
48
  before do
48
49
  data = [[0,0], [999,999]]
49
- @a = SOM.new(data, :nodes => 2, :dimensions => 2)
50
+ @a = SOM.new(data, :nodes => 2)
50
51
  end
51
52
 
52
53
  it "should belong to 2 seperate nodes" do
@@ -59,20 +60,29 @@ describe "Som" do
59
60
  describe "Classify" do
60
61
  before do
61
62
  data = [[0,0], [999,999]]
62
- @a = SOM.new(data, :nodes => 2, :dimensions => 2)
63
+ a = SOM.new(data, :nodes => 1)
64
+ a.train
65
+ @a = a.classify([1,1])
63
66
  end
64
67
 
65
68
  it "should belong to 2 seperate nodes" do
66
- @a.train
67
- @a.classify([1,1]).should be_an(Array)
68
- @a.classify([1,1]).size.should == 1
69
+ @a.should be_an(Array)
70
+ @a.size.should == 2
71
+ end
72
+
73
+ it "should return a node id" do
74
+ @a[0].should == 0
75
+ end
76
+
77
+ it "should return an array of training_data ids" do
78
+ @a[1].should be_an(Array)
69
79
  end
70
80
  end
71
81
 
72
82
  describe "Global Distance Error" do
73
83
  before do
74
84
  data = [[0,0], [999,999]]
75
- @a = SOM.new(data, :nodes => 2, :dimensions => 2)
85
+ @a = SOM.new(data, :nodes => 2)
76
86
  end
77
87
 
78
88
  it "should return an integer" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: som
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - reddavis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-29 00:00:00 +00:00
12
+ date: 2009-11-30 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency