ai4r 1.7 → 1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +5 -0
- data/examples/som/som_data.rb +156 -0
- data/examples/som/som_multi_node_example.rb +22 -0
- data/examples/som/som_single_example.rb +24 -0
- data/lib/ai4r.rb +26 -24
- data/lib/ai4r/classifiers/prism.rb +1 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +3 -3
- data/lib/ai4r/som/layer.rb +68 -0
- data/lib/ai4r/som/node.rb +96 -0
- data/lib/ai4r/som/som.rb +155 -0
- data/lib/ai4r/som/two_phase_layer.rb +90 -0
- data/test/classifiers/id3_test.rb +6 -4
- data/test/classifiers/prism_test.rb +5 -3
- data/test/clusterers/average_linkage_test.rb +5 -2
- data/test/clusterers/centroid_linkage_test.rb +5 -2
- data/test/clusterers/complete_linkage_test.rb +5 -2
- data/test/clusterers/median_linkage_test.rb +5 -2
- data/test/clusterers/ward_linkage_test.rb +6 -3
- data/test/clusterers/weighted_average_linkage_test.rb +5 -2
- data/test/som/som_test.rb +97 -0
- metadata +94 -92
- data/examples/clusterers/simple_website_clustering.rb +0 -47
data/README.rdoc
CHANGED
@@ -36,6 +36,11 @@ If you do not want to make it public, send it to me: Sergio Fierens, email addre
|
|
36
36
|
AI4R is an active project. If you are interested about what we are working on,
|
37
37
|
checkout the development roadmap: http://wiki.jadeferret.com/AI4R_RoadMap
|
38
38
|
|
39
|
+
= Contributors
|
40
|
+
|
41
|
+
* Thomas Kern (SOM implementation and examples)
|
42
|
+
* Sergio Fierens
|
43
|
+
|
39
44
|
= Disclaimer
|
40
45
|
|
41
46
|
In plain english:
|
@@ -0,0 +1,156 @@
|
|
1
|
+
# data is from the iris dataset (http://archive.ics.uci.edu/ml/datasets/Iris)
|
2
|
+
# it is the full dataset, removing the last column
|
3
|
+
# website provides additional information on the dataset itself (attributes, class distribution, etc)
|
4
|
+
|
5
|
+
SOM_DATA = [
|
6
|
+
[5.1, 3.5, 1.4, 0.2],
|
7
|
+
[4.9, 3.0, 1.4, 0.2],
|
8
|
+
[4.7, 3.2, 1.3, 0.2],
|
9
|
+
[4.6, 3.1, 1.5, 0.2],
|
10
|
+
[5.0, 3.6, 1.4, 0.2],
|
11
|
+
[5.4, 3.9, 1.7, 0.4],
|
12
|
+
[4.6, 3.4, 1.4, 0.3],
|
13
|
+
[5.0, 3.4, 1.5, 0.2],
|
14
|
+
[4.4, 2.9, 1.4, 0.2],
|
15
|
+
[4.9, 3.1, 1.5, 0.1],
|
16
|
+
[5.4, 3.7, 1.5, 0.2],
|
17
|
+
[4.8, 3.4, 1.6, 0.2],
|
18
|
+
[4.8, 3.0, 1.4, 0.1],
|
19
|
+
[4.3, 3.0, 1.1, 0.1],
|
20
|
+
[5.8, 4.0, 1.2, 0.2],
|
21
|
+
[5.7, 4.4, 1.5, 0.4],
|
22
|
+
[5.4, 3.9, 1.3, 0.4],
|
23
|
+
[5.1, 3.5, 1.4, 0.3],
|
24
|
+
[5.7, 3.8, 1.7, 0.3],
|
25
|
+
[5.1, 3.8, 1.5, 0.3],
|
26
|
+
[5.4, 3.4, 1.7, 0.2],
|
27
|
+
[5.1, 3.7, 1.5, 0.4],
|
28
|
+
[4.6, 3.6, 1.0, 0.2],
|
29
|
+
[5.1, 3.3, 1.7, 0.5],
|
30
|
+
[4.8, 3.4, 1.9, 0.2],
|
31
|
+
[5.0, 3.0, 1.6, 0.2],
|
32
|
+
[5.0, 3.4, 1.6, 0.4],
|
33
|
+
[5.2, 3.5, 1.5, 0.2],
|
34
|
+
[5.2, 3.4, 1.4, 0.2],
|
35
|
+
[4.7, 3.2, 1.6, 0.2],
|
36
|
+
[4.8, 3.1, 1.6, 0.2],
|
37
|
+
[5.4, 3.4, 1.5, 0.4],
|
38
|
+
[5.2, 4.1, 1.5, 0.1],
|
39
|
+
[5.5, 4.2, 1.4, 0.2],
|
40
|
+
[4.9, 3.1, 1.5, 0.1],
|
41
|
+
[5.0, 3.2, 1.2, 0.2],
|
42
|
+
[5.5, 3.5, 1.3, 0.2],
|
43
|
+
[4.9, 3.1, 1.5, 0.1],
|
44
|
+
[4.4, 3.0, 1.3, 0.2],
|
45
|
+
[5.1, 3.4, 1.5, 0.2],
|
46
|
+
[5.0, 3.5, 1.3, 0.3],
|
47
|
+
[4.5, 2.3, 1.3, 0.3],
|
48
|
+
[4.4, 3.2, 1.3, 0.2],
|
49
|
+
[5.0, 3.5, 1.6, 0.6],
|
50
|
+
[5.1, 3.8, 1.9, 0.4],
|
51
|
+
[4.8, 3.0, 1.4, 0.3],
|
52
|
+
[5.1, 3.8, 1.6, 0.2],
|
53
|
+
[4.6, 3.2, 1.4, 0.2],
|
54
|
+
[5.3, 3.7, 1.5, 0.2],
|
55
|
+
[5.0, 3.3, 1.4, 0.2],
|
56
|
+
[7.0, 3.2, 4.7, 1.4],
|
57
|
+
[6.4, 3.2, 4.5, 1.5],
|
58
|
+
[6.9, 3.1, 4.9, 1.5],
|
59
|
+
[5.5, 2.3, 4.0, 1.3],
|
60
|
+
[6.5, 2.8, 4.6, 1.5],
|
61
|
+
[5.7, 2.8, 4.5, 1.3],
|
62
|
+
[6.3, 3.3, 4.7, 1.6],
|
63
|
+
[4.9, 2.4, 3.3, 1.0],
|
64
|
+
[6.6, 2.9, 4.6, 1.3],
|
65
|
+
[5.2, 2.7, 3.9, 1.4],
|
66
|
+
[5.0, 2.0, 3.5, 1.0],
|
67
|
+
[5.9, 3.0, 4.2, 1.5],
|
68
|
+
[6.0, 2.2, 4.0, 1.0],
|
69
|
+
[6.1, 2.9, 4.7, 1.4],
|
70
|
+
[5.6, 2.9, 3.6, 1.3],
|
71
|
+
[6.7, 3.1, 4.4, 1.4],
|
72
|
+
[5.6, 3.0, 4.5, 1.5],
|
73
|
+
[5.8, 2.7, 4.1, 1.0],
|
74
|
+
[6.2, 2.2, 4.5, 1.5],
|
75
|
+
[5.6, 2.5, 3.9, 1.1],
|
76
|
+
[5.9, 3.2, 4.8, 1.8],
|
77
|
+
[6.1, 2.8, 4.0, 1.3],
|
78
|
+
[6.3, 2.5, 4.9, 1.5],
|
79
|
+
[6.1, 2.8, 4.7, 1.2],
|
80
|
+
[6.4, 2.9, 4.3, 1.3],
|
81
|
+
[6.6, 3.0, 4.4, 1.4],
|
82
|
+
[6.8, 2.8, 4.8, 1.4],
|
83
|
+
[6.7, 3.0, 5.0, 1.7],
|
84
|
+
[6.0, 2.9, 4.5, 1.5],
|
85
|
+
[5.7, 2.6, 3.5, 1.0],
|
86
|
+
[5.5, 2.4, 3.8, 1.1],
|
87
|
+
[5.5, 2.4, 3.7, 1.0],
|
88
|
+
[5.8, 2.7, 3.9, 1.2],
|
89
|
+
[6.0, 2.7, 5.1, 1.6],
|
90
|
+
[5.4, 3.0, 4.5, 1.5],
|
91
|
+
[6.0, 3.4, 4.5, 1.6],
|
92
|
+
[6.7, 3.1, 4.7, 1.5],
|
93
|
+
[6.3, 2.3, 4.4, 1.3],
|
94
|
+
[5.6, 3.0, 4.1, 1.3],
|
95
|
+
[5.5, 2.5, 4.0, 1.3],
|
96
|
+
[5.5, 2.6, 4.4, 1.2],
|
97
|
+
[6.1, 3.0, 4.6, 1.4],
|
98
|
+
[5.8, 2.6, 4.0, 1.2],
|
99
|
+
[5.0, 2.3, 3.3, 1.0],
|
100
|
+
[5.6, 2.7, 4.2, 1.3],
|
101
|
+
[5.7, 3.0, 4.2, 1.2],
|
102
|
+
[5.7, 2.9, 4.2, 1.3],
|
103
|
+
[6.2, 2.9, 4.3, 1.3],
|
104
|
+
[5.1, 2.5, 3.0, 1.1],
|
105
|
+
[5.7, 2.8, 4.1, 1.3],
|
106
|
+
[6.3, 3.3, 6.0, 2.5],
|
107
|
+
[5.8, 2.7, 5.1, 1.9],
|
108
|
+
[7.1, 3.0, 5.9, 2.1],
|
109
|
+
[6.3, 2.9, 5.6, 1.8],
|
110
|
+
[6.5, 3.0, 5.8, 2.2],
|
111
|
+
[7.6, 3.0, 6.6, 2.1],
|
112
|
+
[4.9, 2.5, 4.5, 1.7],
|
113
|
+
[7.3, 2.9, 6.3, 1.8],
|
114
|
+
[6.7, 2.5, 5.8, 1.8],
|
115
|
+
[7.2, 3.6, 6.1, 2.5],
|
116
|
+
[6.5, 3.2, 5.1, 2.0],
|
117
|
+
[6.4, 2.7, 5.3, 1.9],
|
118
|
+
[6.8, 3.0, 5.5, 2.1],
|
119
|
+
[5.7, 2.5, 5.0, 2.0],
|
120
|
+
[5.8, 2.8, 5.1, 2.4],
|
121
|
+
[6.4, 3.2, 5.3, 2.3],
|
122
|
+
[6.5, 3.0, 5.5, 1.8],
|
123
|
+
[7.7, 3.8, 6.7, 2.2],
|
124
|
+
[7.7, 2.6, 6.9, 2.3],
|
125
|
+
[6.0, 2.2, 5.0, 1.5],
|
126
|
+
[6.9, 3.2, 5.7, 2.3],
|
127
|
+
[5.6, 2.8, 4.9, 2.0],
|
128
|
+
[7.7, 2.8, 6.7, 2.0],
|
129
|
+
[6.3, 2.7, 4.9, 1.8],
|
130
|
+
[6.7, 3.3, 5.7, 2.1],
|
131
|
+
[7.2, 3.2, 6.0, 1.8],
|
132
|
+
[6.2, 2.8, 4.8, 1.8],
|
133
|
+
[6.1, 3.0, 4.9, 1.8],
|
134
|
+
[6.4, 2.8, 5.6, 2.1],
|
135
|
+
[7.2, 3.0, 5.8, 1.6],
|
136
|
+
[7.4, 2.8, 6.1, 1.9],
|
137
|
+
[7.9, 3.8, 6.4, 2.0],
|
138
|
+
[6.4, 2.8, 5.6, 2.2],
|
139
|
+
[6.3, 2.8, 5.1, 1.5],
|
140
|
+
[6.1, 2.6, 5.6, 1.4],
|
141
|
+
[7.7, 3.0, 6.1, 2.3],
|
142
|
+
[6.3, 3.4, 5.6, 2.4],
|
143
|
+
[6.4, 3.1, 5.5, 1.8],
|
144
|
+
[6.0, 3.0, 4.8, 1.8],
|
145
|
+
[6.9, 3.1, 5.4, 2.1],
|
146
|
+
[6.7, 3.1, 5.6, 2.4],
|
147
|
+
[6.9, 3.1, 5.1, 2.3],
|
148
|
+
[5.8, 2.7, 5.1, 1.9],
|
149
|
+
[6.8, 3.2, 5.9, 2.3],
|
150
|
+
[6.7, 3.3, 5.7, 2.5],
|
151
|
+
[6.7, 3.0, 5.2, 2.3],
|
152
|
+
[6.3, 2.5, 5.0, 1.9],
|
153
|
+
[6.5, 3.0, 5.2, 2.0],
|
154
|
+
[6.2, 3.4, 5.4, 2.3],
|
155
|
+
[5.9, 3.0, 5.1, 1.8],
|
156
|
+
]
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# this example shows the impact of the size of a som on the global error distance
|
2
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
|
3
|
+
require File.dirname(__FILE__) + '/som_data'
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
10.times do |t|
|
7
|
+
t += 3 # minimum number of nodes
|
8
|
+
|
9
|
+
puts "Nodes: #{t}"
|
10
|
+
som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(t)
|
11
|
+
som.initiate_map
|
12
|
+
|
13
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}"
|
14
|
+
puts "\ntraining the som\n"
|
15
|
+
|
16
|
+
times = Benchmark.measure do
|
17
|
+
som.train SOM_DATA
|
18
|
+
end
|
19
|
+
|
20
|
+
puts "Elapsed time for training: #{times}"
|
21
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
|
22
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
|
2
|
+
require File.dirname(__FILE__) + '/som_data'
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(10)
|
6
|
+
som.initiate_map
|
7
|
+
|
8
|
+
som.nodes.each do |node|
|
9
|
+
p node.weights
|
10
|
+
end
|
11
|
+
|
12
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}"
|
13
|
+
puts "\ntraining the som\n"
|
14
|
+
|
15
|
+
times = Benchmark.measure do
|
16
|
+
som.train SOM_DATA
|
17
|
+
end
|
18
|
+
|
19
|
+
som.nodes.each do |node|
|
20
|
+
p node.weights
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "Elapsed time for training: #{times}"
|
24
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
|
data/lib/ai4r.rb
CHANGED
@@ -1,29 +1,31 @@
|
|
1
1
|
# Data
|
2
|
-
require "ai4r/data/data_set"
|
3
|
-
require "ai4r/data/statistics"
|
4
|
-
require "ai4r/data/proximity"
|
5
|
-
require "ai4r/data/parameterizable"
|
2
|
+
require File.dirname(__FILE__) + "/ai4r/data/data_set"
|
3
|
+
require File.dirname(__FILE__) + "/ai4r/data/statistics"
|
4
|
+
require File.dirname(__FILE__) + "/ai4r/data/proximity"
|
5
|
+
require File.dirname(__FILE__) + "/ai4r/data/parameterizable"
|
6
6
|
# Clusterers
|
7
|
-
require "ai4r/clusterers/clusterer"
|
8
|
-
require "ai4r/clusterers/k_means"
|
9
|
-
require "ai4r/clusterers/bisecting_k_means"
|
10
|
-
require "ai4r/clusterers/single_linkage"
|
11
|
-
require "ai4r/clusterers/complete_linkage"
|
12
|
-
require "ai4r/clusterers/average_linkage"
|
13
|
-
require "ai4r/clusterers/weighted_average_linkage"
|
14
|
-
require "ai4r/clusterers/centroid_linkage"
|
15
|
-
require "ai4r/clusterers/median_linkage"
|
16
|
-
require "ai4r/clusterers/ward_linkage"
|
17
|
-
require "ai4r/clusterers/diana"
|
7
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/clusterer"
|
8
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/k_means"
|
9
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/bisecting_k_means"
|
10
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/single_linkage"
|
11
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/complete_linkage"
|
12
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/average_linkage"
|
13
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/weighted_average_linkage"
|
14
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/centroid_linkage"
|
15
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/median_linkage"
|
16
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/ward_linkage"
|
17
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/diana"
|
18
18
|
# Classifiers
|
19
|
-
require "ai4r/classifiers/classifier"
|
20
|
-
require "ai4r/classifiers/id3"
|
21
|
-
require "ai4r/classifiers/prism"
|
22
|
-
require "ai4r/classifiers/one_r"
|
23
|
-
require "ai4r/classifiers/zero_r"
|
24
|
-
require "ai4r/classifiers/hyperpipes"
|
19
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/classifier"
|
20
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/id3"
|
21
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/prism"
|
22
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/one_r"
|
23
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/zero_r"
|
24
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/hyperpipes"
|
25
25
|
# Neural networks
|
26
|
-
require "ai4r/neural_network/backpropagation"
|
27
|
-
require "ai4r/neural_network/hopfield"
|
26
|
+
require File.dirname(__FILE__) + "/ai4r/neural_network/backpropagation"
|
27
|
+
require File.dirname(__FILE__) + "/ai4r/neural_network/hopfield"
|
28
28
|
# Genetic Algorithms
|
29
|
-
require "ai4r/genetic_algorithm/genetic_algorithm"
|
29
|
+
require File.dirname(__FILE__) + "/ai4r/genetic_algorithm/genetic_algorithm"
|
30
|
+
# SOM
|
31
|
+
require File.dirname(__FILE__) + "/ai4r/som/som"
|
@@ -178,7 +178,7 @@ module Ai4r
|
|
178
178
|
last_token = @data[0]
|
179
179
|
cost = 0
|
180
180
|
@data[1..-1].each do |token|
|
181
|
-
cost += @@costs[last_token][token]
|
181
|
+
cost += @@costs.data_items[last_token][token]
|
182
182
|
last_token = token
|
183
183
|
end
|
184
184
|
@fitness = -1 * cost
|
@@ -220,7 +220,7 @@ module Ai4r
|
|
220
220
|
# In this case, we have implemented edge recombination, wich is the
|
221
221
|
# most used reproduction algorithm for the Travelling salesman problem.
|
222
222
|
def self.reproduce(a, b)
|
223
|
-
data_size = @@costs[0].length
|
223
|
+
data_size = @@costs.data_items[0].length
|
224
224
|
available = []
|
225
225
|
0.upto(data_size-1) { |n| available << n }
|
226
226
|
token = a.data[0]
|
@@ -249,7 +249,7 @@ module Ai4r
|
|
249
249
|
# use some problem domain knowledge, to generate a
|
250
250
|
# (probably) better initial solution.
|
251
251
|
def self.seed
|
252
|
-
data_size = @@costs[0].length
|
252
|
+
data_size = @@costs.data_items[0].length
|
253
253
|
available = []
|
254
254
|
0.upto(data_size-1) { |n| available << n }
|
255
255
|
seed = []
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# Author:: Thomas Kern
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/parameterizable'
|
11
|
+
|
12
|
+
module Ai4r
|
13
|
+
|
14
|
+
module Som
|
15
|
+
|
16
|
+
# responsible for the implementation of the algorithm's decays
|
17
|
+
# currently has methods for the decay of the radius, influence and learning rate.
|
18
|
+
# Has only one phase, which ends after the number of epochs is passed by the Som-class.
|
19
|
+
#
|
20
|
+
# = Parameters
|
21
|
+
# * nodes => number of nodes in the SOM (nodes x nodes). Has to be the same number
|
22
|
+
# you pass to the SOM. Has to be an integer
|
23
|
+
# * radius => the initial radius for the neighborhood
|
24
|
+
# * epochs => number of epochs the algorithm runs, has to be an integer. By default it is set to 100
|
25
|
+
# * learning_rate => sets the initial learning rate
|
26
|
+
class Layer
|
27
|
+
|
28
|
+
include Ai4r::Data::Parameterizable
|
29
|
+
|
30
|
+
parameters_info :nodes => "number of nodes, has to be equal to the som",
|
31
|
+
:epochs => "number of epochs the algorithm has to run",
|
32
|
+
:radius => "sets the initial neighborhoud radius"
|
33
|
+
|
34
|
+
def initialize(nodes, radius, epochs = 100, learning_rate = 0.7)
|
35
|
+
raise("Too few nodes") if nodes < 3
|
36
|
+
|
37
|
+
@nodes = nodes
|
38
|
+
@epochs = epochs
|
39
|
+
@radius = radius
|
40
|
+
@time_for_epoch = @epochs / Math.log(nodes / 4.0)
|
41
|
+
@time_for_epoch = @epochs + 1.0 if @time_for_epoch < @epochs
|
42
|
+
|
43
|
+
@initial_learning_rate = learning_rate
|
44
|
+
end
|
45
|
+
|
46
|
+
# calculates the influnce decay for a certain distance and the current radius
|
47
|
+
# of the epoch
|
48
|
+
def influence_decay(distance, radius)
|
49
|
+
Math.exp(- (distance.to_f**2 / 2.0 / radius.to_f**2))
|
50
|
+
end
|
51
|
+
|
52
|
+
# calculates the radius decay for the current epoch. Uses @time_for_epoch
|
53
|
+
# which has to be higher than the number of epochs, otherwise the decay will be - Infinity
|
54
|
+
def radius_decay(epoch)
|
55
|
+
(@radius * ( 1 - epoch/ @time_for_epoch)).round
|
56
|
+
end
|
57
|
+
|
58
|
+
# calculates the learning rate decay. uses @time_for_epoch again and same rule applies:
|
59
|
+
# @time_for_epoch has to be higher than the number of epochs, otherwise the decay will be - Infinity
|
60
|
+
def learning_rate_decay(epoch)
|
61
|
+
@initial_learning_rate * ( 1 - epoch / @time_for_epoch)
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# Author:: Thomas Kern
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/parameterizable'
|
11
|
+
require File.dirname(__FILE__) + '/layer'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
|
15
|
+
module Som
|
16
|
+
|
17
|
+
# this class is used for the individual node and will be (nodes * nodes)-time instantiated
|
18
|
+
#
|
19
|
+
# = attributes
|
20
|
+
#
|
21
|
+
# * direct access to the x and y values is granted, those show the position of the node in
|
22
|
+
# the square map
|
23
|
+
# * id => is the uniq and sequential ID of the node
|
24
|
+
# * weights => values of the current weights are stored in an array of dimension 'dimensions'.
|
25
|
+
# Weights are of type float
|
26
|
+
# * instantiated_weight => the values of the first instantiation of weights. these values are
|
27
|
+
# never changed
|
28
|
+
|
29
|
+
class Node
|
30
|
+
|
31
|
+
include Ai4r::Data::Parameterizable
|
32
|
+
|
33
|
+
parameters_info :weights => "holds the current weight",
|
34
|
+
:instantiated_weight => "holds the very first weight",
|
35
|
+
:x => "holds the row ID of the unit in the map",
|
36
|
+
:y => "holds the column ID of the unit in the map",
|
37
|
+
:id => "id of the node"
|
38
|
+
|
39
|
+
# creates an instance of Node and instantiates the weights
|
40
|
+
# the parameters is a uniq and sequential ID as well as the number of total nodes
|
41
|
+
# dimensions signals the dimension of the input vector
|
42
|
+
def self.create(id, total, dimensions)
|
43
|
+
n = Node.new
|
44
|
+
n.id = id
|
45
|
+
n.instantiate_weight dimensions
|
46
|
+
n.x = id % total
|
47
|
+
n.y = (id / total.to_f).to_i
|
48
|
+
n
|
49
|
+
end
|
50
|
+
|
51
|
+
# instantiates the weights to the dimension (of the input vector)
|
52
|
+
# for backup reasons, the instantiated weight is stored into @instantiated_weight as well
|
53
|
+
def instantiate_weight(dimensions)
|
54
|
+
@weights = Array.new dimensions
|
55
|
+
@instantiated_weight = Array.new dimensions
|
56
|
+
@weights.each_with_index do |weight, index|
|
57
|
+
@weights[index] = rand
|
58
|
+
@instantiated_weight[index] = @weights[index]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# returns the square distance between the current weights and the input
|
63
|
+
# the input is a vector/array of the same size as weights
|
64
|
+
# at the end, the square root is extracted from the sum of differences
|
65
|
+
def distance_to_input(input)
|
66
|
+
dist = 0
|
67
|
+
input.each_with_index do |i, index|
|
68
|
+
dist += (i - @weights[index]) ** 2
|
69
|
+
end
|
70
|
+
|
71
|
+
Math.sqrt(dist)
|
72
|
+
end
|
73
|
+
|
74
|
+
# returns the distance in square-form from the instance node to the passed node
|
75
|
+
# example:
|
76
|
+
# 2 2 2 2 2
|
77
|
+
# 2 1 1 1 2
|
78
|
+
# 2 1 0 1 2
|
79
|
+
# 2 1 1 1 2
|
80
|
+
# 2 2 2 2 2
|
81
|
+
# 0 being the current node
|
82
|
+
def distance_to_node(node)
|
83
|
+
max((self.x - node.x).abs, (self.y - node.y).abs)
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def max(a, b)
|
89
|
+
a > b ? a : b
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
data/lib/ai4r/som/som.rb
ADDED
@@ -0,0 +1,155 @@
|
|
1
|
+
# Author:: Thomas Kern
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/parameterizable'
|
11
|
+
require File.dirname(__FILE__) + '/layer'
|
12
|
+
require File.dirname(__FILE__) + '/two_phase_layer'
|
13
|
+
require File.dirname(__FILE__) + '/node'
|
14
|
+
|
15
|
+
module Ai4r
|
16
|
+
|
17
|
+
# A self-organizing map (SOM) or self-organizing feature map (SOFM) is a type
|
18
|
+
# of artificial neural network that is trained using unsupervised learning to
|
19
|
+
# produce a low-dimensional (typically two-dimensional), discretized
|
20
|
+
# representation of the input space of the training samples, called a map.
|
21
|
+
|
22
|
+
# for more have a look at http://en.wikipedia.org/wiki/Self-organizing_map
|
23
|
+
# an in-depth explanation is provided by Sandhya Samarasinghe in
|
24
|
+
# 'Neural Networks for Applied Sciences and Engineering'
|
25
|
+
|
26
|
+
module Som
|
27
|
+
|
28
|
+
# = Introduction
|
29
|
+
#
|
30
|
+
# This is an implementation of a Kohonen Self-Organizing Maps
|
31
|
+
#
|
32
|
+
# = Features
|
33
|
+
#
|
34
|
+
# * Support for any network architecture (number of layers and neurons)
|
35
|
+
# * Configurable propagation function
|
36
|
+
# * Optional usage of bias
|
37
|
+
# * Configurable momentum
|
38
|
+
# * Configurable learning rate
|
39
|
+
# * Configurable initial weight function
|
40
|
+
# * 100% ruby code, no external dependency
|
41
|
+
#
|
42
|
+
# = Parameters
|
43
|
+
# * dim => dimension of the input vector
|
44
|
+
# * number_of_nodes => is the number of nodes per row/column (square som).
|
45
|
+
# * layer => instante of a layer-algorithm class
|
46
|
+
#
|
47
|
+
# = About the project
|
48
|
+
# Author:: Thomas Kern
|
49
|
+
# License:: MPL 1.1
|
50
|
+
# Url:: http://ai4r.rubyforge.org
|
51
|
+
|
52
|
+
class Som
|
53
|
+
|
54
|
+
include Ai4r::Data::Parameterizable
|
55
|
+
|
56
|
+
parameters_info :nodes => "sets the architecture of the map (nodes x nodes)",
|
57
|
+
:dimension => "sets the dimension of the input",
|
58
|
+
:layer => "instance of a layer, defines how the training algorithm works",
|
59
|
+
:epoch => "number of finished epochs"
|
60
|
+
|
61
|
+
def initialize(dim, number_of_nodes, layer)
|
62
|
+
@layer = layer
|
63
|
+
@dimension = dim
|
64
|
+
@number_of_nodes = number_of_nodes
|
65
|
+
@nodes = Array.new(number_of_nodes * number_of_nodes)
|
66
|
+
@epoch = 0
|
67
|
+
@cache = {}
|
68
|
+
end
|
69
|
+
|
70
|
+
# finds the best matching unit (bmu) of a certain input in all the @nodes
|
71
|
+
# returns an array of length 2 => [node, distance] (distance is of eucledian type, not
|
72
|
+
# a neighborhood distance)
|
73
|
+
def find_bmu(input)
|
74
|
+
bmu = @nodes.first
|
75
|
+
dist = bmu.distance_to_input input
|
76
|
+
@nodes[1..-1].each do |node|
|
77
|
+
tmp_dist = node.distance_to_input(input)
|
78
|
+
if tmp_dist <= dist
|
79
|
+
dist = tmp_dist
|
80
|
+
bmu = node
|
81
|
+
end
|
82
|
+
end
|
83
|
+
[bmu, dist]
|
84
|
+
end
|
85
|
+
|
86
|
+
# adjusts all nodes within a certain radius to the bmu
|
87
|
+
def adjust_nodes(input, bmu, radius, learning_rate)
|
88
|
+
@nodes.each do |node|
|
89
|
+
dist = node.distance_to_node(bmu[0])
|
90
|
+
next unless dist < radius
|
91
|
+
|
92
|
+
influence = @layer.influence_decay dist, radius
|
93
|
+
node.weights.each_with_index do |weight, index|
|
94
|
+
node.weights[index] += influence * learning_rate * (input[index] - weight)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# main method for the som. trains the map with the passed data vector
|
100
|
+
# calls train_step as long as train_step returns false
|
101
|
+
def train(data)
|
102
|
+
while !train_step(data)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# calculates the global distance error for all data entries
|
107
|
+
def global_error(data)
|
108
|
+
data.inject(0) {|sum,entry| sum + find_bmu(entry)[1]**2 }
|
109
|
+
end
|
110
|
+
|
111
|
+
# trains the map with the data as long as the @epoch is smaller than the epoch-value of
|
112
|
+
# @layer
|
113
|
+
# returns true if @epoch is greater than the fixed epoch-value in @layer, otherwise false
|
114
|
+
# 1 is added to @epoch at each method call
|
115
|
+
# the radius and learning rate is decreased at each method call/epoch as well
|
116
|
+
def train_step(data)
|
117
|
+
return true if @epoch >= @layer.epochs
|
118
|
+
|
119
|
+
radius = @layer.radius_decay @epoch
|
120
|
+
learning_rate = @layer.learning_rate_decay @epoch
|
121
|
+
|
122
|
+
data.each do |entry|
|
123
|
+
adjust_nodes entry, find_bmu(entry), radius, learning_rate
|
124
|
+
end
|
125
|
+
|
126
|
+
@epoch += 1
|
127
|
+
false
|
128
|
+
end
|
129
|
+
|
130
|
+
# returns the node at position (x,y) in the square map
|
131
|
+
def get_node(x, y)
|
132
|
+
raise(Exception.new) if check_param_for_som(x,y)
|
133
|
+
@nodes[y + x * @number_of_nodes]
|
134
|
+
end
|
135
|
+
|
136
|
+
# intitiates the map by creating (@number_of_nodes * @number_of_nodes) nodes
|
137
|
+
def initiate_map
|
138
|
+
@nodes.each_with_index do |node, i|
|
139
|
+
@nodes[i] = Node.create i, @number_of_nodes, @dimension
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
private
|
144
|
+
|
145
|
+
# checks whether or not there is a node in the map at the coordinates (x,y).
|
146
|
+
# x is the row, y the column indicator
|
147
|
+
def check_param_for_som(x, y)
|
148
|
+
y > @number_of_nodes - 1 || x > @number_of_nodes - 1 || x < 0 || y < 0
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# Author:: Thomas Kern
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/parameterizable'
|
11
|
+
require File.dirname(__FILE__) + '/layer'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
|
15
|
+
module Som
|
16
|
+
|
17
|
+
# responsible for the implementation of the algorithm's decays, extends the class Layer.
|
18
|
+
# currently overrides the radius and learning rate decay methods of Layer.
|
19
|
+
# Has two phases, phase one has a decay in both the learning rate and the radius. The number
|
20
|
+
# of epochs for both phases can be passed and the total number of epochs is the sum of epoch
|
21
|
+
# for phase one and phase two.
|
22
|
+
# In the scond phase, the learning and radius decay is steady, normally set to a small number (ie. 0.01)
|
23
|
+
#
|
24
|
+
# = Parameters
|
25
|
+
# * nodes => number of nodes in the SOM (nodes x nodes). Has to be the same number
|
26
|
+
# you pass to the SOM. Has to be an integer
|
27
|
+
# * radius => the initial radius for the neighborhood
|
28
|
+
# * phase_one => number of epochs for phase one, has to be an integer. By default it is set to 150
|
29
|
+
# * phase_two => number of epochs for phase two, has to be an integer. By default it is set to 100
|
30
|
+
# * learning_rate => sets the initial learning rate
|
31
|
+
# * phase_one_learning_rate => sets the learning rate for phase one
|
32
|
+
# * phase_two_learning_rate => sets the learning rate for phase two
|
33
|
+
|
34
|
+
class TwoPhaseLayer < Layer
|
35
|
+
|
36
|
+
def initialize(nodes, learning_rate = 0.9, phase_one = 150, phase_two = 100,
|
37
|
+
phase_one_learning_rate = 0.1, phase_two_learning_rate = 0)
|
38
|
+
super nodes, nodes, phase_one + phase_two, learning_rate
|
39
|
+
@phase_one = phase_one
|
40
|
+
@phase_two = phase_two
|
41
|
+
@lr = @initial_learning_rate
|
42
|
+
|
43
|
+
@phase_one_learning_rate = phase_one_learning_rate
|
44
|
+
@phase_two_learning_rate = phase_two_learning_rate
|
45
|
+
|
46
|
+
@radius_reduction = @phase_one / (nodes/2.0 - 1) + 1
|
47
|
+
@delta_lr = (@lr - @phase_one_learning_rate)/ @phase_one
|
48
|
+
@radius = (nodes / 2.0).to_i
|
49
|
+
end
|
50
|
+
|
51
|
+
# two different values will be returned, depending on the phase
|
52
|
+
# in phase one, the radius will incrementially reduced by 1 every @radius_reduction time
|
53
|
+
# in phase two, the radius is fixed to 1
|
54
|
+
def radius_decay(epoch)
|
55
|
+
if epoch > @phase_one
|
56
|
+
return 1
|
57
|
+
else
|
58
|
+
if (epoch % @radius_reduction) == 0
|
59
|
+
@radius -= 1
|
60
|
+
end
|
61
|
+
@radius
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
# two different values will be returned, depending on the phase
|
67
|
+
# in phase one, the rate will incrementially reduced everytime this method is called
|
68
|
+
# on the switch of phases, the learning rate will be reset and the delta_lr (which signals
|
69
|
+
# the decay value of the learning rate) is reset as well
|
70
|
+
# in phase two, the newly reset delta_lr rate will be used to incrementially reduce the
|
71
|
+
# learning rate
|
72
|
+
def learning_rate_decay(epoch)
|
73
|
+
if epoch < @phase_one
|
74
|
+
@lr -= @delta_lr
|
75
|
+
return @lr
|
76
|
+
elsif epoch == @phase_one
|
77
|
+
@lr = @phase_one_learning_rate
|
78
|
+
@delta_lr = (@phase_one_learning_rate - @phase_two_learning_rate)/@phase_two
|
79
|
+
return @lr
|
80
|
+
else
|
81
|
+
@lr -= @delta_lr
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
@@ -82,14 +82,16 @@ SPLIT_DATA_ITEMS_BY_AGE = [ [
|
|
82
82
|
"elsif age_range=='>80' then marketing_target='Y'\n"+
|
83
83
|
"else raise 'There was not enough information during training to do a proper induction for this data element' end"
|
84
84
|
|
85
|
-
Ai4r::Classifiers::ID3.send(:public, *Ai4r::Classifiers::ID3.protected_instance_methods)
|
86
|
-
Ai4r::Classifiers::ID3.send(:public, *Ai4r::Classifiers::ID3.private_instance_methods)
|
87
|
-
|
88
85
|
include Ai4r::Classifiers
|
89
86
|
include Ai4r::Data
|
90
87
|
|
91
88
|
class ID3Test < Test::Unit::TestCase
|
92
|
-
|
89
|
+
|
90
|
+
def test_build
|
91
|
+
Ai4r::Classifiers::ID3.send(:public, *Ai4r::Classifiers::ID3.protected_instance_methods)
|
92
|
+
Ai4r::Classifiers::ID3.send(:public, *Ai4r::Classifiers::ID3.private_instance_methods)
|
93
|
+
end
|
94
|
+
|
93
95
|
def test_log2
|
94
96
|
assert_equal 1.0, ID3.log2(2)
|
95
97
|
assert_equal 0.0, ID3.log2(0)
|
@@ -1,14 +1,12 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/prism'
|
3
3
|
|
4
|
-
Ai4r::Classifiers::Prism.send(:public, *Ai4r::Classifiers::Prism.protected_instance_methods)
|
5
|
-
Ai4r::Classifiers::Prism.send(:public, *Ai4r::Classifiers::Prism.private_instance_methods)
|
6
4
|
|
7
5
|
class PrismTest < Test::Unit::TestCase
|
8
6
|
|
9
7
|
include Ai4r::Classifiers
|
10
8
|
include Ai4r::Data
|
11
|
-
|
9
|
+
|
12
10
|
@@data_examples = [ ['New York', '<30', 'M', 'Y'],
|
13
11
|
['Chicago', '<30', 'M', 'Y'],
|
14
12
|
['Chicago', '<30', 'F', 'Y'],
|
@@ -42,6 +40,9 @@ class PrismTest < Test::Unit::TestCase
|
|
42
40
|
assert_equal("city", classifier.data_set.data_labels.first)
|
43
41
|
assert_equal("marketing_target", classifier.data_set.data_labels.last)
|
44
42
|
assert !classifier.rules.empty?
|
43
|
+
|
44
|
+
Prism.send(:public, *Prism.protected_instance_methods)
|
45
|
+
Prism.send(:public, *Prism.private_instance_methods)
|
45
46
|
end
|
46
47
|
|
47
48
|
def test_eval
|
@@ -76,6 +77,7 @@ class PrismTest < Test::Unit::TestCase
|
|
76
77
|
def test_matches_conditions
|
77
78
|
classifier = Prism.new.build(DataSet.new(:data_labels => @@data_labels,
|
78
79
|
:data_items => @@data_examples))
|
80
|
+
|
79
81
|
assert classifier.matches_conditions(['New York', '<30', 'M', 'Y'], {"age_range" => "<30"})
|
80
82
|
assert !classifier.matches_conditions(['New York', '<30', 'M', 'Y'], {"age_range" => "[50-80]"})
|
81
83
|
end
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/average_linkage'
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::AverageLinkage < Ai4r::Clusterers::SingleLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix
|
15
|
-
public :linkage_distance
|
16
|
-
public :distance_between_item_and_cluster
|
17
15
|
end
|
18
16
|
|
19
17
|
class AverageLinkageTest < Test::Unit::TestCase
|
@@ -36,6 +34,11 @@ class AverageLinkageTest < Test::Unit::TestCase
|
|
36
34
|
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
+
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::AverageLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::AverageLinkage.protected_instance_methods)
|
41
|
+
end
|
39
42
|
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::AverageLinkage.new
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/centroid_linkage'
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::CentroidLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
public :linkage_distance
|
16
|
-
public :create_initial_index_clusters
|
17
15
|
end
|
18
16
|
|
19
17
|
class Ai4r::Clusterers::CentroidLinkageTest < Test::Unit::TestCase
|
@@ -37,6 +35,11 @@ class Ai4r::Clusterers::CentroidLinkageTest < Test::Unit::TestCase
|
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
39
37
|
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::CentroidLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::CentroidLinkage.protected_instance_methods)
|
41
|
+
end
|
42
|
+
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::CentroidLinkage.new
|
42
45
|
clusterer.data_set = DataSet.new :data_items => @@data
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/complete_linkage'
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::CompleteLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix
|
15
|
-
public :linkage_distance
|
16
|
-
public :distance_between_item_and_cluster
|
17
15
|
end
|
18
16
|
|
19
17
|
class CompleteLinkageTest < Test::Unit::TestCase
|
@@ -36,6 +34,11 @@ class CompleteLinkageTest < Test::Unit::TestCase
|
|
36
34
|
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
+
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::CompleteLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::CompleteLinkage.protected_instance_methods)
|
41
|
+
end
|
39
42
|
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::CompleteLinkage.new
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/median_linkage'
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::MedianLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
public :linkage_distance
|
16
|
-
public :create_initial_index_clusters
|
17
15
|
end
|
18
16
|
|
19
17
|
class Ai4r::Clusterers::MedianLinkageTest < Test::Unit::TestCase
|
@@ -36,6 +34,11 @@ class Ai4r::Clusterers::MedianLinkageTest < Test::Unit::TestCase
|
|
36
34
|
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
+
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::MedianLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::MedianLinkage.protected_instance_methods)
|
41
|
+
end
|
39
42
|
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::MedianLinkage.new
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/ward_linkage'
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::WardLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
public :linkage_distance
|
16
|
-
public :create_initial_index_clusters
|
17
15
|
end
|
18
16
|
|
19
17
|
class Ai4r::Clusterers::WardLinkageTest < Test::Unit::TestCase
|
@@ -36,7 +34,12 @@ class Ai4r::Clusterers::WardLinkageTest < Test::Unit::TestCase
|
|
36
34
|
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
39
|
-
|
37
|
+
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::WardLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::WardLinkage.protected_instance_methods)
|
41
|
+
end
|
42
|
+
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::WardLinkage.new
|
42
45
|
clusterer.data_set = DataSet.new :data_items => @@data
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/weighted_average_li
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::WeightedAverageLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
public :linkage_distance
|
16
|
-
public :create_initial_index_clusters
|
17
15
|
end
|
18
16
|
|
19
17
|
class Ai4r::Clusterers::WeightedAverageLinkageTest < Test::Unit::TestCase
|
@@ -36,6 +34,11 @@ class Ai4r::Clusterers::WeightedAverageLinkageTest < Test::Unit::TestCase
|
|
36
34
|
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
+
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::WeightedAverageLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::WeightedAverageLinkage.protected_instance_methods)
|
41
|
+
end
|
39
42
|
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::WeightedAverageLinkage.new
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# This is a unit test file for the SOM algorithm implemented
|
2
|
+
# in ai4r
|
3
|
+
#
|
4
|
+
# Author:: Thomas Kern
|
5
|
+
# License:: MPL 1.1
|
6
|
+
# Project:: ai4r
|
7
|
+
# Url:: http://ai4r.rubyforge.org/
|
8
|
+
#
|
9
|
+
# You can redistribute it and/or modify it under the terms of
|
10
|
+
# the Mozilla Public License version 1.1 as published by the
|
11
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
12
|
+
|
13
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
|
14
|
+
require 'test/unit'
|
15
|
+
|
16
|
+
module Ai4r
|
17
|
+
|
18
|
+
module Som
|
19
|
+
|
20
|
+
class SomTest < Test::Unit::TestCase
|
21
|
+
|
22
|
+
def setup
|
23
|
+
@som = Som.new 2, 5, Layer.new(3, 3)
|
24
|
+
@som.initiate_map
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_random_initiation
|
28
|
+
assert_equal 25, @som.nodes.length
|
29
|
+
|
30
|
+
@som.nodes.each do |node|
|
31
|
+
assert_equal 2, node.weights.length
|
32
|
+
|
33
|
+
node.weights.each do |weight|
|
34
|
+
assert weight < 1
|
35
|
+
assert weight > 0
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
# bmu
|
43
|
+
|
44
|
+
def test_find_bmu
|
45
|
+
bmu = @som.find_bmu([0.5, 0.5])
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_adjust_nodes
|
49
|
+
@som.adjust_nodes [1, 2], @som.find_bmu([0.5, 0.5]), 2, 0.1
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_access_to_nodes
|
53
|
+
assert_raise Exception do
|
54
|
+
@som.get_node(5, 5)
|
55
|
+
end
|
56
|
+
|
57
|
+
assert_raise Exception do
|
58
|
+
@som.get_node(5, -3)
|
59
|
+
end
|
60
|
+
|
61
|
+
assert_equal Node, @som.get_node(0, 0).class
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_distance_for_same_row
|
65
|
+
assert_equal 2, distancer(0, 0, 0, 2)
|
66
|
+
assert_equal 2, distancer(0, 4, 0, 2)
|
67
|
+
assert_equal 0, distancer(0, 0, 0, 0)
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_distance_for_same_column
|
71
|
+
assert_equal 1, distancer(0, 0, 1, 0)
|
72
|
+
assert_equal 2, distancer(2, 0, 0, 0)
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_distance_for_diagonally_point
|
76
|
+
assert_equal 1, distancer(1, 0, 0, 1)
|
77
|
+
assert_equal 2, distancer(2, 2, 0, 0)
|
78
|
+
assert_equal 2, distancer(3, 2, 1, 4)
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_distance_for_screwed_diagonally_point
|
82
|
+
assert_equal 2, distancer(0, 0, 2, 1)
|
83
|
+
assert_equal 4, distancer(3, 4, 1, 0)
|
84
|
+
assert_equal 2, distancer(3, 2, 1, 3)
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def distancer(x1, y1, x2, y2)
|
90
|
+
@som.get_node(x1, y1).distance_to_node(@som.get_node(x2, y2))
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
metadata
CHANGED
@@ -1,125 +1,127 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.4
|
3
|
+
specification_version: 1
|
2
4
|
name: ai4r
|
3
5
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "1.
|
6
|
+
version: "1.8"
|
7
|
+
date: 2009-06-15 00:00:00 +01:00
|
8
|
+
summary: Ruby implementations of algorithms covering several Artificial intelligence fields, including Genetic algorithms, Neural Networks, machine learning, and clustering.
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: sergio@jadeferret.com
|
12
|
+
homepage: http://ai4r.rubyforge.org
|
13
|
+
rubyforge_project: ai4r
|
14
|
+
description:
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
5
25
|
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
6
29
|
authors:
|
7
30
|
- Sergio Fierens
|
8
|
-
autorequire:
|
9
|
-
bindir: bin
|
10
|
-
cert_chain: []
|
11
|
-
|
12
|
-
date: 2009-04-29 00:00:00 +01:00
|
13
|
-
default_executable:
|
14
|
-
dependencies: []
|
15
|
-
|
16
|
-
description:
|
17
|
-
email: sergio@jadeferret.com
|
18
|
-
executables: []
|
19
|
-
|
20
|
-
extensions: []
|
21
|
-
|
22
|
-
extra_rdoc_files:
|
23
|
-
- README.rdoc
|
24
31
|
files:
|
25
32
|
- examples/clusterers
|
26
|
-
- examples/clusterers/simple_website_clustering.rb
|
27
|
-
- examples/neural_network
|
28
|
-
- examples/neural_network/backpropagation_example.rb
|
29
|
-
- examples/neural_network/patterns_with_base_noise.rb
|
30
|
-
- examples/neural_network/xor_example.rb
|
31
|
-
- examples/neural_network/patterns_with_noise.rb
|
32
|
-
- examples/neural_network/training_patterns.rb
|
33
33
|
- examples/decision_trees
|
34
34
|
- examples/decision_trees/data_set.csv
|
35
|
-
- examples/decision_trees/results.txt
|
36
35
|
- examples/decision_trees/id3_example.rb
|
36
|
+
- examples/decision_trees/results.txt
|
37
37
|
- examples/genetic_algorithm
|
38
38
|
- examples/genetic_algorithm/genetic_algorithm_example.rb
|
39
39
|
- examples/genetic_algorithm/travel_cost.csv
|
40
|
-
-
|
40
|
+
- examples/neural_network
|
41
|
+
- examples/neural_network/backpropagation_example.rb
|
42
|
+
- examples/neural_network/patterns_with_base_noise.rb
|
43
|
+
- examples/neural_network/patterns_with_noise.rb
|
44
|
+
- examples/neural_network/training_patterns.rb
|
45
|
+
- examples/neural_network/xor_example.rb
|
46
|
+
- examples/som
|
47
|
+
- examples/som/som_data.rb
|
48
|
+
- examples/som/som_multi_node_example.rb
|
49
|
+
- examples/som/som_single_example.rb
|
41
50
|
- lib/ai4r
|
51
|
+
- lib/ai4r/classifiers
|
52
|
+
- lib/ai4r/classifiers/classifier.rb
|
53
|
+
- lib/ai4r/classifiers/hyperpipes.rb
|
54
|
+
- lib/ai4r/classifiers/id3.rb
|
55
|
+
- lib/ai4r/classifiers/multilayer_perceptron.rb
|
56
|
+
- lib/ai4r/classifiers/one_r.rb
|
57
|
+
- lib/ai4r/classifiers/prism.rb
|
58
|
+
- lib/ai4r/classifiers/zero_r.rb
|
42
59
|
- lib/ai4r/clusterers
|
43
60
|
- lib/ai4r/clusterers/average_linkage.rb
|
44
|
-
- lib/ai4r/clusterers/
|
61
|
+
- lib/ai4r/clusterers/bisecting_k_means.rb
|
45
62
|
- lib/ai4r/clusterers/centroid_linkage.rb
|
46
|
-
- lib/ai4r/clusterers/
|
63
|
+
- lib/ai4r/clusterers/clusterer.rb
|
47
64
|
- lib/ai4r/clusterers/complete_linkage.rb
|
48
65
|
- lib/ai4r/clusterers/diana.rb
|
49
|
-
- lib/ai4r/clusterers/bisecting_k_means.rb
|
50
|
-
- lib/ai4r/clusterers/ward_linkage.rb
|
51
|
-
- lib/ai4r/clusterers/single_linkage.rb
|
52
66
|
- lib/ai4r/clusterers/k_means.rb
|
53
|
-
- lib/ai4r/clusterers/
|
67
|
+
- lib/ai4r/clusterers/median_linkage.rb
|
68
|
+
- lib/ai4r/clusterers/single_linkage.rb
|
69
|
+
- lib/ai4r/clusterers/ward_linkage.rb
|
70
|
+
- lib/ai4r/clusterers/weighted_average_linkage.rb
|
71
|
+
- lib/ai4r/data
|
72
|
+
- lib/ai4r/data/data_set.rb
|
73
|
+
- lib/ai4r/data/parameterizable.rb
|
74
|
+
- lib/ai4r/data/proximity.rb
|
75
|
+
- lib/ai4r/data/statistics.rb
|
54
76
|
- lib/ai4r/experiment
|
55
77
|
- lib/ai4r/experiment/classifier_evaluator.rb
|
78
|
+
- lib/ai4r/genetic_algorithm
|
79
|
+
- lib/ai4r/genetic_algorithm/genetic_algorithm.rb
|
56
80
|
- lib/ai4r/neural_network
|
57
81
|
- lib/ai4r/neural_network/backpropagation.rb
|
58
82
|
- lib/ai4r/neural_network/hopfield.rb
|
59
|
-
- lib/ai4r/
|
60
|
-
- lib/ai4r/
|
61
|
-
- lib/ai4r/
|
62
|
-
- lib/ai4r/
|
63
|
-
- lib/ai4r/
|
64
|
-
- lib/ai4r
|
65
|
-
- lib/ai4r/classifiers/classifier.rb
|
66
|
-
- lib/ai4r/classifiers/id3.rb
|
67
|
-
- lib/ai4r/genetic_algorithm
|
68
|
-
- lib/ai4r/genetic_algorithm/genetic_algorithm.rb
|
69
|
-
- lib/ai4r/data
|
70
|
-
- lib/ai4r/data/parameterizable.rb
|
71
|
-
- lib/ai4r/data/statistics.rb
|
72
|
-
- lib/ai4r/data/data_set.rb
|
73
|
-
- lib/ai4r/data/proximity.rb
|
83
|
+
- lib/ai4r/som
|
84
|
+
- lib/ai4r/som/layer.rb
|
85
|
+
- lib/ai4r/som/node.rb
|
86
|
+
- lib/ai4r/som/som.rb
|
87
|
+
- lib/ai4r/som/two_phase_layer.rb
|
88
|
+
- lib/ai4r.rb
|
74
89
|
- README.rdoc
|
75
|
-
has_rdoc: true
|
76
|
-
homepage: http://ai4r.rubyforge.org
|
77
|
-
post_install_message:
|
78
|
-
rdoc_options: []
|
79
|
-
|
80
|
-
require_paths:
|
81
|
-
- lib
|
82
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
83
|
-
requirements:
|
84
|
-
- - ">="
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
version: "0"
|
87
|
-
version:
|
88
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
-
requirements:
|
90
|
-
- - ">="
|
91
|
-
- !ruby/object:Gem::Version
|
92
|
-
version: "0"
|
93
|
-
version:
|
94
|
-
requirements: []
|
95
|
-
|
96
|
-
rubyforge_project: ai4r
|
97
|
-
rubygems_version: 1.3.1
|
98
|
-
signing_key:
|
99
|
-
specification_version: 2
|
100
|
-
summary: Ruby implementations of algorithms covering several Artificial intelligence fields, including Genetic algorithms, Neural Networks, machine learning, and clustering.
|
101
90
|
test_files:
|
102
|
-
- test/
|
103
|
-
- test/
|
104
|
-
- test/
|
91
|
+
- test/classifiers/hyperpipes_test.rb
|
92
|
+
- test/classifiers/id3_test.rb
|
93
|
+
- test/classifiers/multilayer_perceptron_test.rb
|
94
|
+
- test/classifiers/one_r_test.rb
|
95
|
+
- test/classifiers/prism_test.rb
|
96
|
+
- test/classifiers/zero_r_test.rb
|
105
97
|
- test/clusterers/average_linkage_test.rb
|
106
|
-
- test/clusterers/
|
107
|
-
- test/clusterers/ward_linkage_test.rb
|
108
|
-
- test/clusterers/complete_linkage_test.rb
|
98
|
+
- test/clusterers/bisecting_k_means_test.rb
|
109
99
|
- test/clusterers/centroid_linkage_test.rb
|
100
|
+
- test/clusterers/complete_linkage_test.rb
|
101
|
+
- test/clusterers/diana_test.rb
|
110
102
|
- test/clusterers/k_means_test.rb
|
111
|
-
- test/clusterers/
|
103
|
+
- test/clusterers/median_linkage_test.rb
|
104
|
+
- test/clusterers/single_linkage_test.rb
|
105
|
+
- test/clusterers/ward_linkage_test.rb
|
106
|
+
- test/clusterers/weighted_average_linkage_test.rb
|
107
|
+
- test/data/data_set_test.rb
|
108
|
+
- test/data/proximity_test.rb
|
109
|
+
- test/data/statistics_test.rb
|
112
110
|
- test/experiment/classifier_evaluator_test.rb
|
113
|
-
- test/neural_network/hopfield_test.rb
|
114
|
-
- test/neural_network/backpropagation_test.rb
|
115
|
-
- test/classifiers/zero_r_test.rb
|
116
|
-
- test/classifiers/multilayer_perceptron_test.rb
|
117
|
-
- test/classifiers/prism_test.rb
|
118
|
-
- test/classifiers/one_r_test.rb
|
119
|
-
- test/classifiers/hyperpipes_test.rb
|
120
|
-
- test/classifiers/id3_test.rb
|
121
|
-
- test/genetic_algorithm/genetic_algorithm_test.rb
|
122
111
|
- test/genetic_algorithm/chromosome_test.rb
|
123
|
-
- test/
|
124
|
-
- test/
|
125
|
-
- test/
|
112
|
+
- test/genetic_algorithm/genetic_algorithm_test.rb
|
113
|
+
- test/neural_network/backpropagation_test.rb
|
114
|
+
- test/neural_network/hopfield_test.rb
|
115
|
+
- test/som/som_test.rb
|
116
|
+
rdoc_options: []
|
117
|
+
|
118
|
+
extra_rdoc_files:
|
119
|
+
- README.rdoc
|
120
|
+
executables: []
|
121
|
+
|
122
|
+
extensions: []
|
123
|
+
|
124
|
+
requirements: []
|
125
|
+
|
126
|
+
dependencies: []
|
127
|
+
|
@@ -1,47 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/google_search'
|
2
|
-
require File.dirname(__FILE__) + '/build_keywords'
|
3
|
-
require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/average_linkage'
|
4
|
-
require 'rubygems'
|
5
|
-
require 'hpricot'
|
6
|
-
require 'net/http'
|
7
|
-
require 'benchmark'
|
8
|
-
|
9
|
-
SITES_TO_CLASSIFY = [
|
10
|
-
"www.foxnews.com", "www.usatoday.com", "scm.jadeferret.com",
|
11
|
-
"www.accurev.com", "www.lastminute.com", "subversion.tigris.org",
|
12
|
-
"news.yahoo.com", "news.bbc.co.uk", "www.orbitz.com"
|
13
|
-
]
|
14
|
-
|
15
|
-
# Return array of keywords for the site
|
16
|
-
def get_keywords(site)
|
17
|
-
response = Net::HTTP.get_response(site, "/")
|
18
|
-
Hpricot(response.body).
|
19
|
-
search("meta[@name='keywords']")[0]. #Select meta keywords element
|
20
|
-
attributes["content"]. #Select its content
|
21
|
-
split(","). #Keywords are coma separated
|
22
|
-
collect{ |k| k.strip.downcase } #Remove start and end white spaces
|
23
|
-
end
|
24
|
-
|
25
|
-
# Get keywords data for each website
|
26
|
-
Site = Struct.new("Site", :name, :keywords)
|
27
|
-
sites = SITES_TO_CLASSIFY.collect do |site_name|
|
28
|
-
Site.new(site_name, get_keywords(site_name))
|
29
|
-
end
|
30
|
-
data_set = Ai4r::Data::DataSet.new(:data_items => sites,
|
31
|
-
:data_labels => Site.members)
|
32
|
-
|
33
|
-
# The distance between sites depends on the keywords collected from internet
|
34
|
-
keywords_distance_function = lambda do |x,y|
|
35
|
-
return Ai4r::Data::Proximity.simple_matching(x.keyword, y.keywords)
|
36
|
-
end
|
37
|
-
|
38
|
-
# Create the clusters
|
39
|
-
clusterer = Ai4r::Clusterers::AverageLinkage.new
|
40
|
-
clusterer.distance_function = keywords_distance_function
|
41
|
-
clusterer.build(data_set, 3)
|
42
|
-
|
43
|
-
# Print results
|
44
|
-
clusterer.clusters.each do |cluster|
|
45
|
-
puts cluster.data_items.collect {|item| item.name}.join(", ")
|
46
|
-
puts "============"
|
47
|
-
end
|