som 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,10 +1,10 @@
1
1
  = SOM - Self Organising Map
2
2
 
3
- A pure Ruby implementation of the Self Organising Map machine learning Algorithm.
3
+ A pure Ruby implementation of the Self Organising Map machine learning algorithm.
4
4
 
5
5
  == Install
6
6
 
7
- gem sources -a -http://gemcutter.org
7
+ gem sources -a http://gemcutter.org
8
8
  sudo gem install som
9
9
 
10
10
  == How To Use
@@ -16,11 +16,30 @@ A pure Ruby implementation of the Self Organising Map machine learning Algorithm
16
16
 
17
17
  a = SOM.new(data, :number_of_nodes => 4, :dimensions => 3)
18
18
  a.train
19
+
20
+ # To see which class a new piece of data fits into
21
+ new_data = [9,8,7]
22
+
23
+ # An array is returned containing the index of the
24
+ # training data that fits into the same class
25
+ # The index is the same as the index in the training data e.g:
26
+ # data[index_returned_by_SOM (2)] == data[2]
27
+ a.classify(new_data)
28
+ #=> [node_index, [training_data_index_1, training_data_index_2...]]
19
29
 
20
- # Returns the index of the data you gave it
30
+ # Returns the id of a node and the
31
+ # index of the data that belongs to it
21
32
  a.inspect
22
- #=> [[1, 0...], [99, 84...], [11, 23...], [2, 6...]]
33
+ #=> [[0, [1, 0...]], [1, [99, 84...]], [2, [11, 23...]]]
34
+
35
+ == Options
36
+
37
+ SOM.new(data, :number_of_nodes => 1, #Default: 5
38
+ :learning_rate => 0.7, #Default: 0.5
39
+ :radius => 1, #Default: number_of_nodes / 2
40
+ :max_iterations => 100, #Default: 100
41
+ :verbose => true) #Default: false
23
42
 
24
43
  == Copyright
25
44
 
26
- Copyright (c) 2009 Red Davis. See LICENSE for details.
45
+ Copyright (c) 2009 Red Davis. See LICENSE for details.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
data/examples/example.rb CHANGED
@@ -5,6 +5,8 @@ require 'benchmark'
5
5
  require 'rubygems'
6
6
  require 'normalizer'
7
7
 
8
+ #SOM_DATA = Array.new(100) { Array.new(50) {rand}}
9
+
8
10
  min, max = Normalizer.find_min_and_max(SOM_DATA)
9
11
 
10
12
  normalizer = Normalizer.new(:min => min, :max => max)
@@ -15,9 +17,8 @@ SOM_DATA.each do |n|
15
17
  data << normalizer.normalize(n)
16
18
  end
17
19
 
18
- a = SOM.new(data, :nodes => 10, :dimensions => data[0].size)
19
-
20
- puts a.nodes.inspect
20
+ a = SOM.new(data, :nodes => 8,
21
+ :radius => 0.8)
21
22
 
22
23
  puts a.global_distance_error
23
24
 
@@ -27,4 +28,6 @@ end
27
28
 
28
29
  puts a.global_distance_error
29
30
 
31
+ #puts a.nodes.inspect
32
+
30
33
  puts times
data/lib/som/node.rb CHANGED
@@ -1,9 +1,10 @@
1
1
  class Node
2
2
 
3
- attr_reader :bucket
3
+ attr_reader :bucket, :id
4
4
 
5
- def initialize(number_of_weights)
5
+ def initialize(id, number_of_weights)
6
6
  create_weights(number_of_weights)
7
+ @id = id
7
8
  @bucket = []
8
9
  end
9
10
 
data/lib/som.rb CHANGED
@@ -4,14 +4,18 @@ class SOM
4
4
 
5
5
  def initialize(training_data, options={})
6
6
  @training_data = training_data
7
+ @dimensions = training_data[0].size
8
+ @iteration_count = 1
9
+
10
+ # Options
7
11
  @number_of_nodes = options[:nodes] || 5
8
- @dimensions = options[:dimensions]
9
12
  @learning_rate = options[:learning_rate] || 0.5
10
13
  @radius = options[:radius] || @number_of_nodes / 2
11
- @iteration_count = 1
12
14
  @max_iterations = options[:max_iterations] || 100
15
+
13
16
  # TODO: Allow a lambda so we can use different neighborhood functions
14
- @neighborhood_function = options[:neighborhood_function] || 1
17
+ @neighborhood_function = 1 #options[:neighborhood_function] || 1
18
+ @verbose = options[:verbose]
15
19
 
16
20
  create_nodes(training_data)
17
21
  end
@@ -28,25 +32,25 @@ class SOM
28
32
  place_data_into_buckets(@training_data)
29
33
  end
30
34
 
31
- # Returns an array of buckets containing the index of the data given
35
+ # Returns an array of buckets containing the index of the training data
32
36
  def inspect
33
- nodes.map {|x| x.bucket.map {|x| x[0]}}
37
+ nodes.map {|x| [x.id, x.bucket] }
34
38
  end
35
39
 
36
- # Return data from node that is closest to data
37
- # You are returned a bucket which contains arrays that look like:
38
- # [index, [data]]
39
- # The index is the original index of that that was pumped into the classifier
40
+ # Return training data from the node that is closest to input data
41
+ # You are returned an array that look like:
42
+ # [node_id, [training_data_index_1, training_data_index_2...]]
43
+ # The index is the original index of that that was pumped into the SOM
40
44
  # during the training process
41
45
  def classify(data)
42
46
  closest_node = find_closest_node(data)
43
- closest_node.bucket
47
+ [closest_node.id, closest_node.bucket]
44
48
  end
45
49
 
46
50
  # Taken from AI4R SOM library #107
47
51
  def global_distance_error
48
52
  @training_data.inject(0) do |sum, n|
49
- sum + find_closest(n)[1]
53
+ sum + find_closest_node_with_distance(n)[1]
50
54
  end
51
55
  end
52
56
 
@@ -54,17 +58,25 @@ class SOM
54
58
 
55
59
  def train_it!(data)
56
60
  return false if @iteration_count >= @max_iterations
61
+
62
+ print_message("Iteration: #{@iteration_count}")
57
63
 
58
- data.each do |input|
64
+ data.each_with_index do |input, index|
65
+ print_message("\tLooking at data #{index+1}/#{data.size}")
66
+
59
67
  # Update closest node
68
+ print_message("\t\tUpdating closest node")
69
+
60
70
  closest_node = find_closest_node(input)
61
71
  closest_node.update_weight(@learning_rate, input)
62
72
 
63
73
  # Update nodes that closer than the radius
64
74
  other_nodes = nodes - [closest_node]
65
- other_nodes.each do |node|
66
- next if decayed_radius > node.distance_from(closest_node.weights)
67
-
75
+ other_nodes.each_with_index do |node, index|
76
+ next if node.distance_from(closest_node.weights) > decayed_radius
77
+
78
+ print_message("\t\tUpdating other nodes: #{index+1}/#{other_nodes.size}")
79
+
68
80
  node.update_weight(@learning_rate, input, neighborhood_function)
69
81
  end
70
82
  end
@@ -72,34 +84,37 @@ class SOM
72
84
  increase_iteration_count!
73
85
  end
74
86
 
87
+ # This places the training data into its closest node's bucket.
75
88
  def place_data_into_buckets(data)
76
89
  data.each_with_index do |input, index|
77
90
  closest_node = find_closest_node(input)
78
- closest_node << [index, input]
91
+ closest_node << index
79
92
  end
80
93
  end
81
94
 
82
95
  def decayed_radius
83
- @radius - (0.1 * @radius * @iteration_count / @max_iterations)
96
+ @radius - (0.7 * @radius * @iteration_count / @max_iterations)
84
97
  end
85
98
 
86
99
  def decayed_learning_rate
87
- @learning_rate - (0.5 * @learning_rate * @iteration_count / @max_iterations)
100
+ @learning_rate - (0.7 * @learning_rate * @iteration_count / @max_iterations)
88
101
  end
89
102
 
90
- def increase_iteration_count!
91
- @iteration_count += 1
92
- end
93
-
94
103
  def neighborhood_function
95
104
  0.5 * @neighborhood_function * @iteration_count / @max_iterations
96
105
  end
97
106
 
107
+ def increase_iteration_count!
108
+ @iteration_count += 1
109
+ end
110
+
98
111
  def find_closest_node(data)
99
- find_closest(data)[0]
112
+ find_closest_node_with_distance(data)[0]
100
113
  end
101
114
 
102
- def find_closest(data)
115
+ # Finds the closest node to some data and returns the node
116
+ # and its distance from the data => [node, distance]
117
+ def find_closest_node_with_distance(data)
103
118
  closest_node = [nodes[0], nodes[0].distance_from(data)]
104
119
 
105
120
  nodes[1..-1].each do |node|
@@ -110,9 +125,15 @@ class SOM
110
125
  end
111
126
  closest_node
112
127
  end
113
-
128
+
114
129
  def create_nodes(data)
115
- @number_of_nodes.times { nodes << Node.new(@dimensions) }
130
+ @number_of_nodes.times do |n|
131
+ nodes << Node.new(n, @dimensions)
132
+ end
133
+ end
134
+
135
+ def print_message(message)
136
+ puts message if @verbose == true
116
137
  end
117
138
 
118
139
  end
data/som.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{som}
8
- s.version = "0.0.2"
8
+ s.version = "0.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2009-11-29}
12
+ s.date = %q{2009-11-30}
13
13
  s.description = %q{A Self Organising Map}
14
14
  s.email = %q{reddavis@gmail.com}
15
15
  s.extra_rdoc_files = [
data/spec/node_spec.rb CHANGED
@@ -3,18 +3,22 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
  describe "Node" do
4
4
  describe "Initialization" do
5
5
  before do
6
- @a = Node.new(5)
6
+ @a = Node.new(1, 5)
7
7
  end
8
8
 
9
9
  it "should have 5 weights" do
10
10
  @a.weights.size.should == 5
11
11
  end
12
+
13
+ it "should have an ID" do
14
+ @a.id.should == 1
15
+ end
12
16
  end
13
17
 
14
18
  describe "Distance Calculation" do
15
19
  before do
16
- @a = Node.new(2)
17
- @b = Node.new(2)
20
+ @a = Node.new(1, 2)
21
+ @b = Node.new(1, 2)
18
22
  end
19
23
 
20
24
  it "should return 0" do
@@ -37,7 +41,7 @@ describe "Node" do
37
41
  describe "Update Weight" do
38
42
  describe "Closest" do
39
43
  before do
40
- @a = Node.new(2)
44
+ @a = Node.new(1, 2)
41
45
  @data = [1,2]
42
46
  end
43
47
 
@@ -50,7 +54,7 @@ describe "Node" do
50
54
 
51
55
  describe "Neighbor" do
52
56
  before do
53
- @a = Node.new(2)
57
+ @a = Node.new(1, 2)
54
58
  @data = [1,2]
55
59
  end
56
60
 
@@ -64,7 +68,7 @@ describe "Node" do
64
68
 
65
69
  describe "Bucket" do
66
70
  before do
67
- @a = Node.new(2)
71
+ @a = Node.new(1, 2)
68
72
  end
69
73
 
70
74
  it "should put data into the nodes bucket" do
data/spec/som_spec.rb CHANGED
@@ -3,8 +3,8 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
  describe "Som" do
4
4
  describe "Training" do
5
5
  before do
6
- @data = [[0,0], [0,0.5], [2,4], [6,5]]
7
- @a = SOM.new(@data, :nodes => 1, :dimensions => 2)
6
+ @data = [[0,0]]
7
+ @a = SOM.new(@data, :nodes => 1)
8
8
  end
9
9
 
10
10
  it "should change the weight of the best matching node" do
@@ -22,31 +22,32 @@ describe "Som" do
22
22
 
23
23
  it "should preserve data indexes" do
24
24
  @a.train
25
-
26
- index_returned = @a.nodes[0].bucket[0][0]
27
- data_returned = @a.nodes[0].bucket[0][1]
28
-
29
- @data[index_returned].should == data_returned
25
+
26
+ index_returned = @a.nodes[0].bucket[0]
27
+ index_returned.should == 0
30
28
  end
31
29
  end
32
30
 
33
31
  describe "Inspect" do
34
32
  before do
35
33
  data = [[2,3]]
36
- @a = SOM.new(data, :nodes => 1, :dimensions => 2)
34
+ @a = SOM.new(data, :nodes => 1)
35
+ @a.train
36
+ end
37
+
38
+ it "should return the id of the nodes" do
39
+ @a.inspect[0][0].should == 0
37
40
  end
38
41
 
39
42
  it "should show the clusters of data indexes" do
40
- @a.train
41
- @a.inspect.should be_an(Array)
42
- @a.inspect.size.should == 1
43
+ @a.inspect[0][1].should be_an(Array)
43
44
  end
44
45
  end
45
46
 
46
47
  describe "Clustering" do
47
48
  before do
48
49
  data = [[0,0], [999,999]]
49
- @a = SOM.new(data, :nodes => 2, :dimensions => 2)
50
+ @a = SOM.new(data, :nodes => 2)
50
51
  end
51
52
 
52
53
  it "should belong to 2 seperate nodes" do
@@ -59,20 +60,29 @@ describe "Som" do
59
60
  describe "Classify" do
60
61
  before do
61
62
  data = [[0,0], [999,999]]
62
- @a = SOM.new(data, :nodes => 2, :dimensions => 2)
63
+ a = SOM.new(data, :nodes => 1)
64
+ a.train
65
+ @a = a.classify([1,1])
63
66
  end
64
67
 
65
68
  it "should belong to 2 seperate nodes" do
66
- @a.train
67
- @a.classify([1,1]).should be_an(Array)
68
- @a.classify([1,1]).size.should == 1
69
+ @a.should be_an(Array)
70
+ @a.size.should == 2
71
+ end
72
+
73
+ it "should return a node id" do
74
+ @a[0].should == 0
75
+ end
76
+
77
+ it "should return an array of training_data ids" do
78
+ @a[1].should be_an(Array)
69
79
  end
70
80
  end
71
81
 
72
82
  describe "Global Distance Error" do
73
83
  before do
74
84
  data = [[0,0], [999,999]]
75
- @a = SOM.new(data, :nodes => 2, :dimensions => 2)
85
+ @a = SOM.new(data, :nodes => 2)
76
86
  end
77
87
 
78
88
  it "should return an integer" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: som
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - reddavis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-29 00:00:00 +00:00
12
+ date: 2009-11-30 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency