ai4r 1.7 → 1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +5 -0
- data/examples/som/som_data.rb +156 -0
- data/examples/som/som_multi_node_example.rb +22 -0
- data/examples/som/som_single_example.rb +24 -0
- data/lib/ai4r.rb +26 -24
- data/lib/ai4r/classifiers/prism.rb +1 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +3 -3
- data/lib/ai4r/som/layer.rb +68 -0
- data/lib/ai4r/som/node.rb +96 -0
- data/lib/ai4r/som/som.rb +155 -0
- data/lib/ai4r/som/two_phase_layer.rb +90 -0
- data/test/classifiers/id3_test.rb +6 -4
- data/test/classifiers/prism_test.rb +5 -3
- data/test/clusterers/average_linkage_test.rb +5 -2
- data/test/clusterers/centroid_linkage_test.rb +5 -2
- data/test/clusterers/complete_linkage_test.rb +5 -2
- data/test/clusterers/median_linkage_test.rb +5 -2
- data/test/clusterers/ward_linkage_test.rb +6 -3
- data/test/clusterers/weighted_average_linkage_test.rb +5 -2
- data/test/som/som_test.rb +97 -0
- metadata +94 -92
- data/examples/clusterers/simple_website_clustering.rb +0 -47
data/README.rdoc
CHANGED
@@ -36,6 +36,11 @@ If you do not want to make it public, send it to me: Sergio Fierens, email addre
|
|
36
36
|
AI4R is an active project. If you are interested about what we are working on,
|
37
37
|
checkout the development roadmap: http://wiki.jadeferret.com/AI4R_RoadMap
|
38
38
|
|
39
|
+
= Contributors
|
40
|
+
|
41
|
+
* Thomas Kern (SOM implementation and examples)
|
42
|
+
* Sergio Fierens
|
43
|
+
|
39
44
|
= Disclaimer
|
40
45
|
|
41
46
|
In plain english:
|
@@ -0,0 +1,156 @@
|
|
1
|
+
# data is from the iris dataset (http://archive.ics.uci.edu/ml/datasets/Iris)
|
2
|
+
# it is the full dataset, removing the last column
|
3
|
+
# website provides additional information on the dataset itself (attributes, class distribution, etc)
|
4
|
+
|
5
|
+
SOM_DATA = [
|
6
|
+
[5.1, 3.5, 1.4, 0.2],
|
7
|
+
[4.9, 3.0, 1.4, 0.2],
|
8
|
+
[4.7, 3.2, 1.3, 0.2],
|
9
|
+
[4.6, 3.1, 1.5, 0.2],
|
10
|
+
[5.0, 3.6, 1.4, 0.2],
|
11
|
+
[5.4, 3.9, 1.7, 0.4],
|
12
|
+
[4.6, 3.4, 1.4, 0.3],
|
13
|
+
[5.0, 3.4, 1.5, 0.2],
|
14
|
+
[4.4, 2.9, 1.4, 0.2],
|
15
|
+
[4.9, 3.1, 1.5, 0.1],
|
16
|
+
[5.4, 3.7, 1.5, 0.2],
|
17
|
+
[4.8, 3.4, 1.6, 0.2],
|
18
|
+
[4.8, 3.0, 1.4, 0.1],
|
19
|
+
[4.3, 3.0, 1.1, 0.1],
|
20
|
+
[5.8, 4.0, 1.2, 0.2],
|
21
|
+
[5.7, 4.4, 1.5, 0.4],
|
22
|
+
[5.4, 3.9, 1.3, 0.4],
|
23
|
+
[5.1, 3.5, 1.4, 0.3],
|
24
|
+
[5.7, 3.8, 1.7, 0.3],
|
25
|
+
[5.1, 3.8, 1.5, 0.3],
|
26
|
+
[5.4, 3.4, 1.7, 0.2],
|
27
|
+
[5.1, 3.7, 1.5, 0.4],
|
28
|
+
[4.6, 3.6, 1.0, 0.2],
|
29
|
+
[5.1, 3.3, 1.7, 0.5],
|
30
|
+
[4.8, 3.4, 1.9, 0.2],
|
31
|
+
[5.0, 3.0, 1.6, 0.2],
|
32
|
+
[5.0, 3.4, 1.6, 0.4],
|
33
|
+
[5.2, 3.5, 1.5, 0.2],
|
34
|
+
[5.2, 3.4, 1.4, 0.2],
|
35
|
+
[4.7, 3.2, 1.6, 0.2],
|
36
|
+
[4.8, 3.1, 1.6, 0.2],
|
37
|
+
[5.4, 3.4, 1.5, 0.4],
|
38
|
+
[5.2, 4.1, 1.5, 0.1],
|
39
|
+
[5.5, 4.2, 1.4, 0.2],
|
40
|
+
[4.9, 3.1, 1.5, 0.1],
|
41
|
+
[5.0, 3.2, 1.2, 0.2],
|
42
|
+
[5.5, 3.5, 1.3, 0.2],
|
43
|
+
[4.9, 3.1, 1.5, 0.1],
|
44
|
+
[4.4, 3.0, 1.3, 0.2],
|
45
|
+
[5.1, 3.4, 1.5, 0.2],
|
46
|
+
[5.0, 3.5, 1.3, 0.3],
|
47
|
+
[4.5, 2.3, 1.3, 0.3],
|
48
|
+
[4.4, 3.2, 1.3, 0.2],
|
49
|
+
[5.0, 3.5, 1.6, 0.6],
|
50
|
+
[5.1, 3.8, 1.9, 0.4],
|
51
|
+
[4.8, 3.0, 1.4, 0.3],
|
52
|
+
[5.1, 3.8, 1.6, 0.2],
|
53
|
+
[4.6, 3.2, 1.4, 0.2],
|
54
|
+
[5.3, 3.7, 1.5, 0.2],
|
55
|
+
[5.0, 3.3, 1.4, 0.2],
|
56
|
+
[7.0, 3.2, 4.7, 1.4],
|
57
|
+
[6.4, 3.2, 4.5, 1.5],
|
58
|
+
[6.9, 3.1, 4.9, 1.5],
|
59
|
+
[5.5, 2.3, 4.0, 1.3],
|
60
|
+
[6.5, 2.8, 4.6, 1.5],
|
61
|
+
[5.7, 2.8, 4.5, 1.3],
|
62
|
+
[6.3, 3.3, 4.7, 1.6],
|
63
|
+
[4.9, 2.4, 3.3, 1.0],
|
64
|
+
[6.6, 2.9, 4.6, 1.3],
|
65
|
+
[5.2, 2.7, 3.9, 1.4],
|
66
|
+
[5.0, 2.0, 3.5, 1.0],
|
67
|
+
[5.9, 3.0, 4.2, 1.5],
|
68
|
+
[6.0, 2.2, 4.0, 1.0],
|
69
|
+
[6.1, 2.9, 4.7, 1.4],
|
70
|
+
[5.6, 2.9, 3.6, 1.3],
|
71
|
+
[6.7, 3.1, 4.4, 1.4],
|
72
|
+
[5.6, 3.0, 4.5, 1.5],
|
73
|
+
[5.8, 2.7, 4.1, 1.0],
|
74
|
+
[6.2, 2.2, 4.5, 1.5],
|
75
|
+
[5.6, 2.5, 3.9, 1.1],
|
76
|
+
[5.9, 3.2, 4.8, 1.8],
|
77
|
+
[6.1, 2.8, 4.0, 1.3],
|
78
|
+
[6.3, 2.5, 4.9, 1.5],
|
79
|
+
[6.1, 2.8, 4.7, 1.2],
|
80
|
+
[6.4, 2.9, 4.3, 1.3],
|
81
|
+
[6.6, 3.0, 4.4, 1.4],
|
82
|
+
[6.8, 2.8, 4.8, 1.4],
|
83
|
+
[6.7, 3.0, 5.0, 1.7],
|
84
|
+
[6.0, 2.9, 4.5, 1.5],
|
85
|
+
[5.7, 2.6, 3.5, 1.0],
|
86
|
+
[5.5, 2.4, 3.8, 1.1],
|
87
|
+
[5.5, 2.4, 3.7, 1.0],
|
88
|
+
[5.8, 2.7, 3.9, 1.2],
|
89
|
+
[6.0, 2.7, 5.1, 1.6],
|
90
|
+
[5.4, 3.0, 4.5, 1.5],
|
91
|
+
[6.0, 3.4, 4.5, 1.6],
|
92
|
+
[6.7, 3.1, 4.7, 1.5],
|
93
|
+
[6.3, 2.3, 4.4, 1.3],
|
94
|
+
[5.6, 3.0, 4.1, 1.3],
|
95
|
+
[5.5, 2.5, 4.0, 1.3],
|
96
|
+
[5.5, 2.6, 4.4, 1.2],
|
97
|
+
[6.1, 3.0, 4.6, 1.4],
|
98
|
+
[5.8, 2.6, 4.0, 1.2],
|
99
|
+
[5.0, 2.3, 3.3, 1.0],
|
100
|
+
[5.6, 2.7, 4.2, 1.3],
|
101
|
+
[5.7, 3.0, 4.2, 1.2],
|
102
|
+
[5.7, 2.9, 4.2, 1.3],
|
103
|
+
[6.2, 2.9, 4.3, 1.3],
|
104
|
+
[5.1, 2.5, 3.0, 1.1],
|
105
|
+
[5.7, 2.8, 4.1, 1.3],
|
106
|
+
[6.3, 3.3, 6.0, 2.5],
|
107
|
+
[5.8, 2.7, 5.1, 1.9],
|
108
|
+
[7.1, 3.0, 5.9, 2.1],
|
109
|
+
[6.3, 2.9, 5.6, 1.8],
|
110
|
+
[6.5, 3.0, 5.8, 2.2],
|
111
|
+
[7.6, 3.0, 6.6, 2.1],
|
112
|
+
[4.9, 2.5, 4.5, 1.7],
|
113
|
+
[7.3, 2.9, 6.3, 1.8],
|
114
|
+
[6.7, 2.5, 5.8, 1.8],
|
115
|
+
[7.2, 3.6, 6.1, 2.5],
|
116
|
+
[6.5, 3.2, 5.1, 2.0],
|
117
|
+
[6.4, 2.7, 5.3, 1.9],
|
118
|
+
[6.8, 3.0, 5.5, 2.1],
|
119
|
+
[5.7, 2.5, 5.0, 2.0],
|
120
|
+
[5.8, 2.8, 5.1, 2.4],
|
121
|
+
[6.4, 3.2, 5.3, 2.3],
|
122
|
+
[6.5, 3.0, 5.5, 1.8],
|
123
|
+
[7.7, 3.8, 6.7, 2.2],
|
124
|
+
[7.7, 2.6, 6.9, 2.3],
|
125
|
+
[6.0, 2.2, 5.0, 1.5],
|
126
|
+
[6.9, 3.2, 5.7, 2.3],
|
127
|
+
[5.6, 2.8, 4.9, 2.0],
|
128
|
+
[7.7, 2.8, 6.7, 2.0],
|
129
|
+
[6.3, 2.7, 4.9, 1.8],
|
130
|
+
[6.7, 3.3, 5.7, 2.1],
|
131
|
+
[7.2, 3.2, 6.0, 1.8],
|
132
|
+
[6.2, 2.8, 4.8, 1.8],
|
133
|
+
[6.1, 3.0, 4.9, 1.8],
|
134
|
+
[6.4, 2.8, 5.6, 2.1],
|
135
|
+
[7.2, 3.0, 5.8, 1.6],
|
136
|
+
[7.4, 2.8, 6.1, 1.9],
|
137
|
+
[7.9, 3.8, 6.4, 2.0],
|
138
|
+
[6.4, 2.8, 5.6, 2.2],
|
139
|
+
[6.3, 2.8, 5.1, 1.5],
|
140
|
+
[6.1, 2.6, 5.6, 1.4],
|
141
|
+
[7.7, 3.0, 6.1, 2.3],
|
142
|
+
[6.3, 3.4, 5.6, 2.4],
|
143
|
+
[6.4, 3.1, 5.5, 1.8],
|
144
|
+
[6.0, 3.0, 4.8, 1.8],
|
145
|
+
[6.9, 3.1, 5.4, 2.1],
|
146
|
+
[6.7, 3.1, 5.6, 2.4],
|
147
|
+
[6.9, 3.1, 5.1, 2.3],
|
148
|
+
[5.8, 2.7, 5.1, 1.9],
|
149
|
+
[6.8, 3.2, 5.9, 2.3],
|
150
|
+
[6.7, 3.3, 5.7, 2.5],
|
151
|
+
[6.7, 3.0, 5.2, 2.3],
|
152
|
+
[6.3, 2.5, 5.0, 1.9],
|
153
|
+
[6.5, 3.0, 5.2, 2.0],
|
154
|
+
[6.2, 3.4, 5.4, 2.3],
|
155
|
+
[5.9, 3.0, 5.1, 1.8],
|
156
|
+
]
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# this example shows the impact of the size of a som on the global error distance
|
2
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
|
3
|
+
require File.dirname(__FILE__) + '/som_data'
|
4
|
+
require 'benchmark'
|
5
|
+
|
6
|
+
10.times do |t|
|
7
|
+
t += 3 # minimum number of nodes
|
8
|
+
|
9
|
+
puts "Nodes: #{t}"
|
10
|
+
som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(t)
|
11
|
+
som.initiate_map
|
12
|
+
|
13
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}"
|
14
|
+
puts "\ntraining the som\n"
|
15
|
+
|
16
|
+
times = Benchmark.measure do
|
17
|
+
som.train SOM_DATA
|
18
|
+
end
|
19
|
+
|
20
|
+
puts "Elapsed time for training: #{times}"
|
21
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
|
22
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
|
2
|
+
require File.dirname(__FILE__) + '/som_data'
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
som = Ai4r::Som::Som.new 4, 8, Ai4r::Som::TwoPhaseLayer.new(10)
|
6
|
+
som.initiate_map
|
7
|
+
|
8
|
+
som.nodes.each do |node|
|
9
|
+
p node.weights
|
10
|
+
end
|
11
|
+
|
12
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}"
|
13
|
+
puts "\ntraining the som\n"
|
14
|
+
|
15
|
+
times = Benchmark.measure do
|
16
|
+
som.train SOM_DATA
|
17
|
+
end
|
18
|
+
|
19
|
+
som.nodes.each do |node|
|
20
|
+
p node.weights
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "Elapsed time for training: #{times}"
|
24
|
+
puts "global error distance: #{som.global_error(SOM_DATA)}\n\n"
|
data/lib/ai4r.rb
CHANGED
@@ -1,29 +1,31 @@
|
|
1
1
|
# Data
|
2
|
-
require "ai4r/data/data_set"
|
3
|
-
require "ai4r/data/statistics"
|
4
|
-
require "ai4r/data/proximity"
|
5
|
-
require "ai4r/data/parameterizable"
|
2
|
+
require File.dirname(__FILE__) + "/ai4r/data/data_set"
|
3
|
+
require File.dirname(__FILE__) + "/ai4r/data/statistics"
|
4
|
+
require File.dirname(__FILE__) + "/ai4r/data/proximity"
|
5
|
+
require File.dirname(__FILE__) + "/ai4r/data/parameterizable"
|
6
6
|
# Clusterers
|
7
|
-
require "ai4r/clusterers/clusterer"
|
8
|
-
require "ai4r/clusterers/k_means"
|
9
|
-
require "ai4r/clusterers/bisecting_k_means"
|
10
|
-
require "ai4r/clusterers/single_linkage"
|
11
|
-
require "ai4r/clusterers/complete_linkage"
|
12
|
-
require "ai4r/clusterers/average_linkage"
|
13
|
-
require "ai4r/clusterers/weighted_average_linkage"
|
14
|
-
require "ai4r/clusterers/centroid_linkage"
|
15
|
-
require "ai4r/clusterers/median_linkage"
|
16
|
-
require "ai4r/clusterers/ward_linkage"
|
17
|
-
require "ai4r/clusterers/diana"
|
7
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/clusterer"
|
8
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/k_means"
|
9
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/bisecting_k_means"
|
10
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/single_linkage"
|
11
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/complete_linkage"
|
12
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/average_linkage"
|
13
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/weighted_average_linkage"
|
14
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/centroid_linkage"
|
15
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/median_linkage"
|
16
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/ward_linkage"
|
17
|
+
require File.dirname(__FILE__) + "/ai4r/clusterers/diana"
|
18
18
|
# Classifiers
|
19
|
-
require "ai4r/classifiers/classifier"
|
20
|
-
require "ai4r/classifiers/id3"
|
21
|
-
require "ai4r/classifiers/prism"
|
22
|
-
require "ai4r/classifiers/one_r"
|
23
|
-
require "ai4r/classifiers/zero_r"
|
24
|
-
require "ai4r/classifiers/hyperpipes"
|
19
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/classifier"
|
20
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/id3"
|
21
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/prism"
|
22
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/one_r"
|
23
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/zero_r"
|
24
|
+
require File.dirname(__FILE__) + "/ai4r/classifiers/hyperpipes"
|
25
25
|
# Neural networks
|
26
|
-
require "ai4r/neural_network/backpropagation"
|
27
|
-
require "ai4r/neural_network/hopfield"
|
26
|
+
require File.dirname(__FILE__) + "/ai4r/neural_network/backpropagation"
|
27
|
+
require File.dirname(__FILE__) + "/ai4r/neural_network/hopfield"
|
28
28
|
# Genetic Algorithms
|
29
|
-
require "ai4r/genetic_algorithm/genetic_algorithm"
|
29
|
+
require File.dirname(__FILE__) + "/ai4r/genetic_algorithm/genetic_algorithm"
|
30
|
+
# SOM
|
31
|
+
require File.dirname(__FILE__) + "/ai4r/som/som"
|
@@ -178,7 +178,7 @@ module Ai4r
|
|
178
178
|
last_token = @data[0]
|
179
179
|
cost = 0
|
180
180
|
@data[1..-1].each do |token|
|
181
|
-
cost += @@costs[last_token][token]
|
181
|
+
cost += @@costs.data_items[last_token][token]
|
182
182
|
last_token = token
|
183
183
|
end
|
184
184
|
@fitness = -1 * cost
|
@@ -220,7 +220,7 @@ module Ai4r
|
|
220
220
|
# In this case, we have implemented edge recombination, wich is the
|
221
221
|
# most used reproduction algorithm for the Travelling salesman problem.
|
222
222
|
def self.reproduce(a, b)
|
223
|
-
data_size = @@costs[0].length
|
223
|
+
data_size = @@costs.data_items[0].length
|
224
224
|
available = []
|
225
225
|
0.upto(data_size-1) { |n| available << n }
|
226
226
|
token = a.data[0]
|
@@ -249,7 +249,7 @@ module Ai4r
|
|
249
249
|
# use some problem domain knowledge, to generate a
|
250
250
|
# (probably) better initial solution.
|
251
251
|
def self.seed
|
252
|
-
data_size = @@costs[0].length
|
252
|
+
data_size = @@costs.data_items[0].length
|
253
253
|
available = []
|
254
254
|
0.upto(data_size-1) { |n| available << n }
|
255
255
|
seed = []
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# Author:: Thomas Kern
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/parameterizable'
|
11
|
+
|
12
|
+
module Ai4r
|
13
|
+
|
14
|
+
module Som
|
15
|
+
|
16
|
+
# responsible for the implementation of the algorithm's decays
|
17
|
+
# currently has methods for the decay of the radius, influence and learning rate.
|
18
|
+
# Has only one phase, which ends after the number of epochs is passed by the Som-class.
|
19
|
+
#
|
20
|
+
# = Parameters
|
21
|
+
# * nodes => number of nodes in the SOM (nodes x nodes). Has to be the same number
|
22
|
+
# you pass to the SOM. Has to be an integer
|
23
|
+
# * radius => the initial radius for the neighborhood
|
24
|
+
# * epochs => number of epochs the algorithm runs, has to be an integer. By default it is set to 100
|
25
|
+
# * learning_rate => sets the initial learning rate
|
26
|
+
class Layer
|
27
|
+
|
28
|
+
include Ai4r::Data::Parameterizable
|
29
|
+
|
30
|
+
parameters_info :nodes => "number of nodes, has to be equal to the som",
|
31
|
+
:epochs => "number of epochs the algorithm has to run",
|
32
|
+
:radius => "sets the initial neighborhoud radius"
|
33
|
+
|
34
|
+
def initialize(nodes, radius, epochs = 100, learning_rate = 0.7)
|
35
|
+
raise("Too few nodes") if nodes < 3
|
36
|
+
|
37
|
+
@nodes = nodes
|
38
|
+
@epochs = epochs
|
39
|
+
@radius = radius
|
40
|
+
@time_for_epoch = @epochs / Math.log(nodes / 4.0)
|
41
|
+
@time_for_epoch = @epochs + 1.0 if @time_for_epoch < @epochs
|
42
|
+
|
43
|
+
@initial_learning_rate = learning_rate
|
44
|
+
end
|
45
|
+
|
46
|
+
# calculates the influnce decay for a certain distance and the current radius
|
47
|
+
# of the epoch
|
48
|
+
def influence_decay(distance, radius)
|
49
|
+
Math.exp(- (distance.to_f**2 / 2.0 / radius.to_f**2))
|
50
|
+
end
|
51
|
+
|
52
|
+
# calculates the radius decay for the current epoch. Uses @time_for_epoch
|
53
|
+
# which has to be higher than the number of epochs, otherwise the decay will be - Infinity
|
54
|
+
def radius_decay(epoch)
|
55
|
+
(@radius * ( 1 - epoch/ @time_for_epoch)).round
|
56
|
+
end
|
57
|
+
|
58
|
+
# calculates the learning rate decay. uses @time_for_epoch again and same rule applies:
|
59
|
+
# @time_for_epoch has to be higher than the number of epochs, otherwise the decay will be - Infinity
|
60
|
+
def learning_rate_decay(epoch)
|
61
|
+
@initial_learning_rate * ( 1 - epoch / @time_for_epoch)
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# Author:: Thomas Kern
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/parameterizable'
|
11
|
+
require File.dirname(__FILE__) + '/layer'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
|
15
|
+
module Som
|
16
|
+
|
17
|
+
# this class is used for the individual node and will be (nodes * nodes)-time instantiated
|
18
|
+
#
|
19
|
+
# = attributes
|
20
|
+
#
|
21
|
+
# * direct access to the x and y values is granted, those show the position of the node in
|
22
|
+
# the square map
|
23
|
+
# * id => is the uniq and sequential ID of the node
|
24
|
+
# * weights => values of the current weights are stored in an array of dimension 'dimensions'.
|
25
|
+
# Weights are of type float
|
26
|
+
# * instantiated_weight => the values of the first instantiation of weights. these values are
|
27
|
+
# never changed
|
28
|
+
|
29
|
+
class Node
|
30
|
+
|
31
|
+
include Ai4r::Data::Parameterizable
|
32
|
+
|
33
|
+
parameters_info :weights => "holds the current weight",
|
34
|
+
:instantiated_weight => "holds the very first weight",
|
35
|
+
:x => "holds the row ID of the unit in the map",
|
36
|
+
:y => "holds the column ID of the unit in the map",
|
37
|
+
:id => "id of the node"
|
38
|
+
|
39
|
+
# creates an instance of Node and instantiates the weights
|
40
|
+
# the parameters is a uniq and sequential ID as well as the number of total nodes
|
41
|
+
# dimensions signals the dimension of the input vector
|
42
|
+
def self.create(id, total, dimensions)
|
43
|
+
n = Node.new
|
44
|
+
n.id = id
|
45
|
+
n.instantiate_weight dimensions
|
46
|
+
n.x = id % total
|
47
|
+
n.y = (id / total.to_f).to_i
|
48
|
+
n
|
49
|
+
end
|
50
|
+
|
51
|
+
# instantiates the weights to the dimension (of the input vector)
|
52
|
+
# for backup reasons, the instantiated weight is stored into @instantiated_weight as well
|
53
|
+
def instantiate_weight(dimensions)
|
54
|
+
@weights = Array.new dimensions
|
55
|
+
@instantiated_weight = Array.new dimensions
|
56
|
+
@weights.each_with_index do |weight, index|
|
57
|
+
@weights[index] = rand
|
58
|
+
@instantiated_weight[index] = @weights[index]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# returns the square distance between the current weights and the input
|
63
|
+
# the input is a vector/array of the same size as weights
|
64
|
+
# at the end, the square root is extracted from the sum of differences
|
65
|
+
def distance_to_input(input)
|
66
|
+
dist = 0
|
67
|
+
input.each_with_index do |i, index|
|
68
|
+
dist += (i - @weights[index]) ** 2
|
69
|
+
end
|
70
|
+
|
71
|
+
Math.sqrt(dist)
|
72
|
+
end
|
73
|
+
|
74
|
+
# returns the distance in square-form from the instance node to the passed node
|
75
|
+
# example:
|
76
|
+
# 2 2 2 2 2
|
77
|
+
# 2 1 1 1 2
|
78
|
+
# 2 1 0 1 2
|
79
|
+
# 2 1 1 1 2
|
80
|
+
# 2 2 2 2 2
|
81
|
+
# 0 being the current node
|
82
|
+
def distance_to_node(node)
|
83
|
+
max((self.x - node.x).abs, (self.y - node.y).abs)
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def max(a, b)
|
89
|
+
a > b ? a : b
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
data/lib/ai4r/som/som.rb
ADDED
@@ -0,0 +1,155 @@
|
|
1
|
+
# Author:: Thomas Kern
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/parameterizable'
|
11
|
+
require File.dirname(__FILE__) + '/layer'
|
12
|
+
require File.dirname(__FILE__) + '/two_phase_layer'
|
13
|
+
require File.dirname(__FILE__) + '/node'
|
14
|
+
|
15
|
+
module Ai4r
|
16
|
+
|
17
|
+
# A self-organizing map (SOM) or self-organizing feature map (SOFM) is a type
|
18
|
+
# of artificial neural network that is trained using unsupervised learning to
|
19
|
+
# produce a low-dimensional (typically two-dimensional), discretized
|
20
|
+
# representation of the input space of the training samples, called a map.
|
21
|
+
|
22
|
+
# for more have a look at http://en.wikipedia.org/wiki/Self-organizing_map
|
23
|
+
# an in-depth explanation is provided by Sandhya Samarasinghe in
|
24
|
+
# 'Neural Networks for Applied Sciences and Engineering'
|
25
|
+
|
26
|
+
module Som
|
27
|
+
|
28
|
+
# = Introduction
|
29
|
+
#
|
30
|
+
# This is an implementation of a Kohonen Self-Organizing Maps
|
31
|
+
#
|
32
|
+
# = Features
|
33
|
+
#
|
34
|
+
# * Support for any network architecture (number of layers and neurons)
|
35
|
+
# * Configurable propagation function
|
36
|
+
# * Optional usage of bias
|
37
|
+
# * Configurable momentum
|
38
|
+
# * Configurable learning rate
|
39
|
+
# * Configurable initial weight function
|
40
|
+
# * 100% ruby code, no external dependency
|
41
|
+
#
|
42
|
+
# = Parameters
|
43
|
+
# * dim => dimension of the input vector
|
44
|
+
# * number_of_nodes => is the number of nodes per row/column (square som).
|
45
|
+
# * layer => instante of a layer-algorithm class
|
46
|
+
#
|
47
|
+
# = About the project
|
48
|
+
# Author:: Thomas Kern
|
49
|
+
# License:: MPL 1.1
|
50
|
+
# Url:: http://ai4r.rubyforge.org
|
51
|
+
|
52
|
+
class Som
|
53
|
+
|
54
|
+
include Ai4r::Data::Parameterizable
|
55
|
+
|
56
|
+
parameters_info :nodes => "sets the architecture of the map (nodes x nodes)",
|
57
|
+
:dimension => "sets the dimension of the input",
|
58
|
+
:layer => "instance of a layer, defines how the training algorithm works",
|
59
|
+
:epoch => "number of finished epochs"
|
60
|
+
|
61
|
+
def initialize(dim, number_of_nodes, layer)
|
62
|
+
@layer = layer
|
63
|
+
@dimension = dim
|
64
|
+
@number_of_nodes = number_of_nodes
|
65
|
+
@nodes = Array.new(number_of_nodes * number_of_nodes)
|
66
|
+
@epoch = 0
|
67
|
+
@cache = {}
|
68
|
+
end
|
69
|
+
|
70
|
+
# finds the best matching unit (bmu) of a certain input in all the @nodes
|
71
|
+
# returns an array of length 2 => [node, distance] (distance is of eucledian type, not
|
72
|
+
# a neighborhood distance)
|
73
|
+
def find_bmu(input)
|
74
|
+
bmu = @nodes.first
|
75
|
+
dist = bmu.distance_to_input input
|
76
|
+
@nodes[1..-1].each do |node|
|
77
|
+
tmp_dist = node.distance_to_input(input)
|
78
|
+
if tmp_dist <= dist
|
79
|
+
dist = tmp_dist
|
80
|
+
bmu = node
|
81
|
+
end
|
82
|
+
end
|
83
|
+
[bmu, dist]
|
84
|
+
end
|
85
|
+
|
86
|
+
# adjusts all nodes within a certain radius to the bmu
|
87
|
+
def adjust_nodes(input, bmu, radius, learning_rate)
|
88
|
+
@nodes.each do |node|
|
89
|
+
dist = node.distance_to_node(bmu[0])
|
90
|
+
next unless dist < radius
|
91
|
+
|
92
|
+
influence = @layer.influence_decay dist, radius
|
93
|
+
node.weights.each_with_index do |weight, index|
|
94
|
+
node.weights[index] += influence * learning_rate * (input[index] - weight)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# main method for the som. trains the map with the passed data vector
|
100
|
+
# calls train_step as long as train_step returns false
|
101
|
+
def train(data)
|
102
|
+
while !train_step(data)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# calculates the global distance error for all data entries
|
107
|
+
def global_error(data)
|
108
|
+
data.inject(0) {|sum,entry| sum + find_bmu(entry)[1]**2 }
|
109
|
+
end
|
110
|
+
|
111
|
+
# trains the map with the data as long as the @epoch is smaller than the epoch-value of
|
112
|
+
# @layer
|
113
|
+
# returns true if @epoch is greater than the fixed epoch-value in @layer, otherwise false
|
114
|
+
# 1 is added to @epoch at each method call
|
115
|
+
# the radius and learning rate is decreased at each method call/epoch as well
|
116
|
+
def train_step(data)
|
117
|
+
return true if @epoch >= @layer.epochs
|
118
|
+
|
119
|
+
radius = @layer.radius_decay @epoch
|
120
|
+
learning_rate = @layer.learning_rate_decay @epoch
|
121
|
+
|
122
|
+
data.each do |entry|
|
123
|
+
adjust_nodes entry, find_bmu(entry), radius, learning_rate
|
124
|
+
end
|
125
|
+
|
126
|
+
@epoch += 1
|
127
|
+
false
|
128
|
+
end
|
129
|
+
|
130
|
+
# returns the node at position (x,y) in the square map
|
131
|
+
def get_node(x, y)
|
132
|
+
raise(Exception.new) if check_param_for_som(x,y)
|
133
|
+
@nodes[y + x * @number_of_nodes]
|
134
|
+
end
|
135
|
+
|
136
|
+
# intitiates the map by creating (@number_of_nodes * @number_of_nodes) nodes
|
137
|
+
def initiate_map
|
138
|
+
@nodes.each_with_index do |node, i|
|
139
|
+
@nodes[i] = Node.create i, @number_of_nodes, @dimension
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
private
|
144
|
+
|
145
|
+
# checks whether or not there is a node in the map at the coordinates (x,y).
|
146
|
+
# x is the row, y the column indicator
|
147
|
+
def check_param_for_som(x, y)
|
148
|
+
y > @number_of_nodes - 1 || x > @number_of_nodes - 1 || x < 0 || y < 0
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# Author:: Thomas Kern
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/../data/parameterizable'
|
11
|
+
require File.dirname(__FILE__) + '/layer'
|
12
|
+
|
13
|
+
module Ai4r
|
14
|
+
|
15
|
+
module Som
|
16
|
+
|
17
|
+
# responsible for the implementation of the algorithm's decays, extends the class Layer.
|
18
|
+
# currently overrides the radius and learning rate decay methods of Layer.
|
19
|
+
# Has two phases, phase one has a decay in both the learning rate and the radius. The number
|
20
|
+
# of epochs for both phases can be passed and the total number of epochs is the sum of epoch
|
21
|
+
# for phase one and phase two.
|
22
|
+
# In the scond phase, the learning and radius decay is steady, normally set to a small number (ie. 0.01)
|
23
|
+
#
|
24
|
+
# = Parameters
|
25
|
+
# * nodes => number of nodes in the SOM (nodes x nodes). Has to be the same number
|
26
|
+
# you pass to the SOM. Has to be an integer
|
27
|
+
# * radius => the initial radius for the neighborhood
|
28
|
+
# * phase_one => number of epochs for phase one, has to be an integer. By default it is set to 150
|
29
|
+
# * phase_two => number of epochs for phase two, has to be an integer. By default it is set to 100
|
30
|
+
# * learning_rate => sets the initial learning rate
|
31
|
+
# * phase_one_learning_rate => sets the learning rate for phase one
|
32
|
+
# * phase_two_learning_rate => sets the learning rate for phase two
|
33
|
+
|
34
|
+
class TwoPhaseLayer < Layer
|
35
|
+
|
36
|
+
def initialize(nodes, learning_rate = 0.9, phase_one = 150, phase_two = 100,
|
37
|
+
phase_one_learning_rate = 0.1, phase_two_learning_rate = 0)
|
38
|
+
super nodes, nodes, phase_one + phase_two, learning_rate
|
39
|
+
@phase_one = phase_one
|
40
|
+
@phase_two = phase_two
|
41
|
+
@lr = @initial_learning_rate
|
42
|
+
|
43
|
+
@phase_one_learning_rate = phase_one_learning_rate
|
44
|
+
@phase_two_learning_rate = phase_two_learning_rate
|
45
|
+
|
46
|
+
@radius_reduction = @phase_one / (nodes/2.0 - 1) + 1
|
47
|
+
@delta_lr = (@lr - @phase_one_learning_rate)/ @phase_one
|
48
|
+
@radius = (nodes / 2.0).to_i
|
49
|
+
end
|
50
|
+
|
51
|
+
# two different values will be returned, depending on the phase
|
52
|
+
# in phase one, the radius will incrementially reduced by 1 every @radius_reduction time
|
53
|
+
# in phase two, the radius is fixed to 1
|
54
|
+
def radius_decay(epoch)
|
55
|
+
if epoch > @phase_one
|
56
|
+
return 1
|
57
|
+
else
|
58
|
+
if (epoch % @radius_reduction) == 0
|
59
|
+
@radius -= 1
|
60
|
+
end
|
61
|
+
@radius
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
# two different values will be returned, depending on the phase
|
67
|
+
# in phase one, the rate will incrementially reduced everytime this method is called
|
68
|
+
# on the switch of phases, the learning rate will be reset and the delta_lr (which signals
|
69
|
+
# the decay value of the learning rate) is reset as well
|
70
|
+
# in phase two, the newly reset delta_lr rate will be used to incrementially reduce the
|
71
|
+
# learning rate
|
72
|
+
def learning_rate_decay(epoch)
|
73
|
+
if epoch < @phase_one
|
74
|
+
@lr -= @delta_lr
|
75
|
+
return @lr
|
76
|
+
elsif epoch == @phase_one
|
77
|
+
@lr = @phase_one_learning_rate
|
78
|
+
@delta_lr = (@phase_one_learning_rate - @phase_two_learning_rate)/@phase_two
|
79
|
+
return @lr
|
80
|
+
else
|
81
|
+
@lr -= @delta_lr
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
@@ -82,14 +82,16 @@ SPLIT_DATA_ITEMS_BY_AGE = [ [
|
|
82
82
|
"elsif age_range=='>80' then marketing_target='Y'\n"+
|
83
83
|
"else raise 'There was not enough information during training to do a proper induction for this data element' end"
|
84
84
|
|
85
|
-
Ai4r::Classifiers::ID3.send(:public, *Ai4r::Classifiers::ID3.protected_instance_methods)
|
86
|
-
Ai4r::Classifiers::ID3.send(:public, *Ai4r::Classifiers::ID3.private_instance_methods)
|
87
|
-
|
88
85
|
include Ai4r::Classifiers
|
89
86
|
include Ai4r::Data
|
90
87
|
|
91
88
|
class ID3Test < Test::Unit::TestCase
|
92
|
-
|
89
|
+
|
90
|
+
def test_build
|
91
|
+
Ai4r::Classifiers::ID3.send(:public, *Ai4r::Classifiers::ID3.protected_instance_methods)
|
92
|
+
Ai4r::Classifiers::ID3.send(:public, *Ai4r::Classifiers::ID3.private_instance_methods)
|
93
|
+
end
|
94
|
+
|
93
95
|
def test_log2
|
94
96
|
assert_equal 1.0, ID3.log2(2)
|
95
97
|
assert_equal 0.0, ID3.log2(0)
|
@@ -1,14 +1,12 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/prism'
|
3
3
|
|
4
|
-
Ai4r::Classifiers::Prism.send(:public, *Ai4r::Classifiers::Prism.protected_instance_methods)
|
5
|
-
Ai4r::Classifiers::Prism.send(:public, *Ai4r::Classifiers::Prism.private_instance_methods)
|
6
4
|
|
7
5
|
class PrismTest < Test::Unit::TestCase
|
8
6
|
|
9
7
|
include Ai4r::Classifiers
|
10
8
|
include Ai4r::Data
|
11
|
-
|
9
|
+
|
12
10
|
@@data_examples = [ ['New York', '<30', 'M', 'Y'],
|
13
11
|
['Chicago', '<30', 'M', 'Y'],
|
14
12
|
['Chicago', '<30', 'F', 'Y'],
|
@@ -42,6 +40,9 @@ class PrismTest < Test::Unit::TestCase
|
|
42
40
|
assert_equal("city", classifier.data_set.data_labels.first)
|
43
41
|
assert_equal("marketing_target", classifier.data_set.data_labels.last)
|
44
42
|
assert !classifier.rules.empty?
|
43
|
+
|
44
|
+
Prism.send(:public, *Prism.protected_instance_methods)
|
45
|
+
Prism.send(:public, *Prism.private_instance_methods)
|
45
46
|
end
|
46
47
|
|
47
48
|
def test_eval
|
@@ -76,6 +77,7 @@ class PrismTest < Test::Unit::TestCase
|
|
76
77
|
def test_matches_conditions
|
77
78
|
classifier = Prism.new.build(DataSet.new(:data_labels => @@data_labels,
|
78
79
|
:data_items => @@data_examples))
|
80
|
+
|
79
81
|
assert classifier.matches_conditions(['New York', '<30', 'M', 'Y'], {"age_range" => "<30"})
|
80
82
|
assert !classifier.matches_conditions(['New York', '<30', 'M', 'Y'], {"age_range" => "[50-80]"})
|
81
83
|
end
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/average_linkage'
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::AverageLinkage < Ai4r::Clusterers::SingleLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix
|
15
|
-
public :linkage_distance
|
16
|
-
public :distance_between_item_and_cluster
|
17
15
|
end
|
18
16
|
|
19
17
|
class AverageLinkageTest < Test::Unit::TestCase
|
@@ -36,6 +34,11 @@ class AverageLinkageTest < Test::Unit::TestCase
|
|
36
34
|
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
+
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::AverageLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::AverageLinkage.protected_instance_methods)
|
41
|
+
end
|
39
42
|
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::AverageLinkage.new
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/centroid_linkage'
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::CentroidLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
public :linkage_distance
|
16
|
-
public :create_initial_index_clusters
|
17
15
|
end
|
18
16
|
|
19
17
|
class Ai4r::Clusterers::CentroidLinkageTest < Test::Unit::TestCase
|
@@ -37,6 +35,11 @@ class Ai4r::Clusterers::CentroidLinkageTest < Test::Unit::TestCase
|
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
39
37
|
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::CentroidLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::CentroidLinkage.protected_instance_methods)
|
41
|
+
end
|
42
|
+
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::CentroidLinkage.new
|
42
45
|
clusterer.data_set = DataSet.new :data_items => @@data
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/complete_linkage'
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::CompleteLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix
|
15
|
-
public :linkage_distance
|
16
|
-
public :distance_between_item_and_cluster
|
17
15
|
end
|
18
16
|
|
19
17
|
class CompleteLinkageTest < Test::Unit::TestCase
|
@@ -36,6 +34,11 @@ class CompleteLinkageTest < Test::Unit::TestCase
|
|
36
34
|
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
+
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::CompleteLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::CompleteLinkage.protected_instance_methods)
|
41
|
+
end
|
39
42
|
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::CompleteLinkage.new
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/median_linkage'
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::MedianLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
public :linkage_distance
|
16
|
-
public :create_initial_index_clusters
|
17
15
|
end
|
18
16
|
|
19
17
|
class Ai4r::Clusterers::MedianLinkageTest < Test::Unit::TestCase
|
@@ -36,6 +34,11 @@ class Ai4r::Clusterers::MedianLinkageTest < Test::Unit::TestCase
|
|
36
34
|
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
+
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::MedianLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::MedianLinkage.protected_instance_methods)
|
41
|
+
end
|
39
42
|
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::MedianLinkage.new
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/ward_linkage'
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::WardLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
public :linkage_distance
|
16
|
-
public :create_initial_index_clusters
|
17
15
|
end
|
18
16
|
|
19
17
|
class Ai4r::Clusterers::WardLinkageTest < Test::Unit::TestCase
|
@@ -36,7 +34,12 @@ class Ai4r::Clusterers::WardLinkageTest < Test::Unit::TestCase
|
|
36
34
|
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
39
|
-
|
37
|
+
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::WardLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::WardLinkage.protected_instance_methods)
|
41
|
+
end
|
42
|
+
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::WardLinkage.new
|
42
45
|
clusterer.data_set = DataSet.new :data_items => @@data
|
@@ -12,8 +12,6 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/weighted_average_li
|
|
12
12
|
|
13
13
|
class Ai4r::Clusterers::WeightedAverageLinkage
|
14
14
|
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
public :linkage_distance
|
16
|
-
public :create_initial_index_clusters
|
17
15
|
end
|
18
16
|
|
19
17
|
class Ai4r::Clusterers::WeightedAverageLinkageTest < Test::Unit::TestCase
|
@@ -36,6 +34,11 @@ class Ai4r::Clusterers::WeightedAverageLinkageTest < Test::Unit::TestCase
|
|
36
34
|
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
37
35
|
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
38
36
|
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
+
|
38
|
+
def setup
|
39
|
+
Ai4r::Clusterers::WeightedAverageLinkage.send(:public,
|
40
|
+
*Ai4r::Clusterers::WeightedAverageLinkage.protected_instance_methods)
|
41
|
+
end
|
39
42
|
|
40
43
|
def test_linkage_distance
|
41
44
|
clusterer = Ai4r::Clusterers::WeightedAverageLinkage.new
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# This is a unit test file for the SOM algorithm implemented
|
2
|
+
# in ai4r
|
3
|
+
#
|
4
|
+
# Author:: Thomas Kern
|
5
|
+
# License:: MPL 1.1
|
6
|
+
# Project:: ai4r
|
7
|
+
# Url:: http://ai4r.rubyforge.org/
|
8
|
+
#
|
9
|
+
# You can redistribute it and/or modify it under the terms of
|
10
|
+
# the Mozilla Public License version 1.1 as published by the
|
11
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
12
|
+
|
13
|
+
require File.dirname(__FILE__) + '/../../lib/ai4r/som/som'
|
14
|
+
require 'test/unit'
|
15
|
+
|
16
|
+
module Ai4r
|
17
|
+
|
18
|
+
module Som
|
19
|
+
|
20
|
+
class SomTest < Test::Unit::TestCase
|
21
|
+
|
22
|
+
def setup
|
23
|
+
@som = Som.new 2, 5, Layer.new(3, 3)
|
24
|
+
@som.initiate_map
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_random_initiation
|
28
|
+
assert_equal 25, @som.nodes.length
|
29
|
+
|
30
|
+
@som.nodes.each do |node|
|
31
|
+
assert_equal 2, node.weights.length
|
32
|
+
|
33
|
+
node.weights.each do |weight|
|
34
|
+
assert weight < 1
|
35
|
+
assert weight > 0
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
# bmu
|
43
|
+
|
44
|
+
def test_find_bmu
|
45
|
+
bmu = @som.find_bmu([0.5, 0.5])
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_adjust_nodes
|
49
|
+
@som.adjust_nodes [1, 2], @som.find_bmu([0.5, 0.5]), 2, 0.1
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_access_to_nodes
|
53
|
+
assert_raise Exception do
|
54
|
+
@som.get_node(5, 5)
|
55
|
+
end
|
56
|
+
|
57
|
+
assert_raise Exception do
|
58
|
+
@som.get_node(5, -3)
|
59
|
+
end
|
60
|
+
|
61
|
+
assert_equal Node, @som.get_node(0, 0).class
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_distance_for_same_row
|
65
|
+
assert_equal 2, distancer(0, 0, 0, 2)
|
66
|
+
assert_equal 2, distancer(0, 4, 0, 2)
|
67
|
+
assert_equal 0, distancer(0, 0, 0, 0)
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_distance_for_same_column
|
71
|
+
assert_equal 1, distancer(0, 0, 1, 0)
|
72
|
+
assert_equal 2, distancer(2, 0, 0, 0)
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_distance_for_diagonally_point
|
76
|
+
assert_equal 1, distancer(1, 0, 0, 1)
|
77
|
+
assert_equal 2, distancer(2, 2, 0, 0)
|
78
|
+
assert_equal 2, distancer(3, 2, 1, 4)
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_distance_for_screwed_diagonally_point
|
82
|
+
assert_equal 2, distancer(0, 0, 2, 1)
|
83
|
+
assert_equal 4, distancer(3, 4, 1, 0)
|
84
|
+
assert_equal 2, distancer(3, 2, 1, 3)
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def distancer(x1, y1, x2, y2)
|
90
|
+
@som.get_node(x1, y1).distance_to_node(@som.get_node(x2, y2))
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
metadata
CHANGED
@@ -1,125 +1,127 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.4
|
3
|
+
specification_version: 1
|
2
4
|
name: ai4r
|
3
5
|
version: !ruby/object:Gem::Version
|
4
|
-
version: "1.
|
6
|
+
version: "1.8"
|
7
|
+
date: 2009-06-15 00:00:00 +01:00
|
8
|
+
summary: Ruby implementations of algorithms covering several Artificial intelligence fields, including Genetic algorithms, Neural Networks, machine learning, and clustering.
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: sergio@jadeferret.com
|
12
|
+
homepage: http://ai4r.rubyforge.org
|
13
|
+
rubyforge_project: ai4r
|
14
|
+
description:
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
5
25
|
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
6
29
|
authors:
|
7
30
|
- Sergio Fierens
|
8
|
-
autorequire:
|
9
|
-
bindir: bin
|
10
|
-
cert_chain: []
|
11
|
-
|
12
|
-
date: 2009-04-29 00:00:00 +01:00
|
13
|
-
default_executable:
|
14
|
-
dependencies: []
|
15
|
-
|
16
|
-
description:
|
17
|
-
email: sergio@jadeferret.com
|
18
|
-
executables: []
|
19
|
-
|
20
|
-
extensions: []
|
21
|
-
|
22
|
-
extra_rdoc_files:
|
23
|
-
- README.rdoc
|
24
31
|
files:
|
25
32
|
- examples/clusterers
|
26
|
-
- examples/clusterers/simple_website_clustering.rb
|
27
|
-
- examples/neural_network
|
28
|
-
- examples/neural_network/backpropagation_example.rb
|
29
|
-
- examples/neural_network/patterns_with_base_noise.rb
|
30
|
-
- examples/neural_network/xor_example.rb
|
31
|
-
- examples/neural_network/patterns_with_noise.rb
|
32
|
-
- examples/neural_network/training_patterns.rb
|
33
33
|
- examples/decision_trees
|
34
34
|
- examples/decision_trees/data_set.csv
|
35
|
-
- examples/decision_trees/results.txt
|
36
35
|
- examples/decision_trees/id3_example.rb
|
36
|
+
- examples/decision_trees/results.txt
|
37
37
|
- examples/genetic_algorithm
|
38
38
|
- examples/genetic_algorithm/genetic_algorithm_example.rb
|
39
39
|
- examples/genetic_algorithm/travel_cost.csv
|
40
|
-
-
|
40
|
+
- examples/neural_network
|
41
|
+
- examples/neural_network/backpropagation_example.rb
|
42
|
+
- examples/neural_network/patterns_with_base_noise.rb
|
43
|
+
- examples/neural_network/patterns_with_noise.rb
|
44
|
+
- examples/neural_network/training_patterns.rb
|
45
|
+
- examples/neural_network/xor_example.rb
|
46
|
+
- examples/som
|
47
|
+
- examples/som/som_data.rb
|
48
|
+
- examples/som/som_multi_node_example.rb
|
49
|
+
- examples/som/som_single_example.rb
|
41
50
|
- lib/ai4r
|
51
|
+
- lib/ai4r/classifiers
|
52
|
+
- lib/ai4r/classifiers/classifier.rb
|
53
|
+
- lib/ai4r/classifiers/hyperpipes.rb
|
54
|
+
- lib/ai4r/classifiers/id3.rb
|
55
|
+
- lib/ai4r/classifiers/multilayer_perceptron.rb
|
56
|
+
- lib/ai4r/classifiers/one_r.rb
|
57
|
+
- lib/ai4r/classifiers/prism.rb
|
58
|
+
- lib/ai4r/classifiers/zero_r.rb
|
42
59
|
- lib/ai4r/clusterers
|
43
60
|
- lib/ai4r/clusterers/average_linkage.rb
|
44
|
-
- lib/ai4r/clusterers/
|
61
|
+
- lib/ai4r/clusterers/bisecting_k_means.rb
|
45
62
|
- lib/ai4r/clusterers/centroid_linkage.rb
|
46
|
-
- lib/ai4r/clusterers/
|
63
|
+
- lib/ai4r/clusterers/clusterer.rb
|
47
64
|
- lib/ai4r/clusterers/complete_linkage.rb
|
48
65
|
- lib/ai4r/clusterers/diana.rb
|
49
|
-
- lib/ai4r/clusterers/bisecting_k_means.rb
|
50
|
-
- lib/ai4r/clusterers/ward_linkage.rb
|
51
|
-
- lib/ai4r/clusterers/single_linkage.rb
|
52
66
|
- lib/ai4r/clusterers/k_means.rb
|
53
|
-
- lib/ai4r/clusterers/
|
67
|
+
- lib/ai4r/clusterers/median_linkage.rb
|
68
|
+
- lib/ai4r/clusterers/single_linkage.rb
|
69
|
+
- lib/ai4r/clusterers/ward_linkage.rb
|
70
|
+
- lib/ai4r/clusterers/weighted_average_linkage.rb
|
71
|
+
- lib/ai4r/data
|
72
|
+
- lib/ai4r/data/data_set.rb
|
73
|
+
- lib/ai4r/data/parameterizable.rb
|
74
|
+
- lib/ai4r/data/proximity.rb
|
75
|
+
- lib/ai4r/data/statistics.rb
|
54
76
|
- lib/ai4r/experiment
|
55
77
|
- lib/ai4r/experiment/classifier_evaluator.rb
|
78
|
+
- lib/ai4r/genetic_algorithm
|
79
|
+
- lib/ai4r/genetic_algorithm/genetic_algorithm.rb
|
56
80
|
- lib/ai4r/neural_network
|
57
81
|
- lib/ai4r/neural_network/backpropagation.rb
|
58
82
|
- lib/ai4r/neural_network/hopfield.rb
|
59
|
-
- lib/ai4r/
|
60
|
-
- lib/ai4r/
|
61
|
-
- lib/ai4r/
|
62
|
-
- lib/ai4r/
|
63
|
-
- lib/ai4r/
|
64
|
-
- lib/ai4r
|
65
|
-
- lib/ai4r/classifiers/classifier.rb
|
66
|
-
- lib/ai4r/classifiers/id3.rb
|
67
|
-
- lib/ai4r/genetic_algorithm
|
68
|
-
- lib/ai4r/genetic_algorithm/genetic_algorithm.rb
|
69
|
-
- lib/ai4r/data
|
70
|
-
- lib/ai4r/data/parameterizable.rb
|
71
|
-
- lib/ai4r/data/statistics.rb
|
72
|
-
- lib/ai4r/data/data_set.rb
|
73
|
-
- lib/ai4r/data/proximity.rb
|
83
|
+
- lib/ai4r/som
|
84
|
+
- lib/ai4r/som/layer.rb
|
85
|
+
- lib/ai4r/som/node.rb
|
86
|
+
- lib/ai4r/som/som.rb
|
87
|
+
- lib/ai4r/som/two_phase_layer.rb
|
88
|
+
- lib/ai4r.rb
|
74
89
|
- README.rdoc
|
75
|
-
has_rdoc: true
|
76
|
-
homepage: http://ai4r.rubyforge.org
|
77
|
-
post_install_message:
|
78
|
-
rdoc_options: []
|
79
|
-
|
80
|
-
require_paths:
|
81
|
-
- lib
|
82
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
83
|
-
requirements:
|
84
|
-
- - ">="
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
version: "0"
|
87
|
-
version:
|
88
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
-
requirements:
|
90
|
-
- - ">="
|
91
|
-
- !ruby/object:Gem::Version
|
92
|
-
version: "0"
|
93
|
-
version:
|
94
|
-
requirements: []
|
95
|
-
|
96
|
-
rubyforge_project: ai4r
|
97
|
-
rubygems_version: 1.3.1
|
98
|
-
signing_key:
|
99
|
-
specification_version: 2
|
100
|
-
summary: Ruby implementations of algorithms covering several Artificial intelligence fields, including Genetic algorithms, Neural Networks, machine learning, and clustering.
|
101
90
|
test_files:
|
102
|
-
- test/
|
103
|
-
- test/
|
104
|
-
- test/
|
91
|
+
- test/classifiers/hyperpipes_test.rb
|
92
|
+
- test/classifiers/id3_test.rb
|
93
|
+
- test/classifiers/multilayer_perceptron_test.rb
|
94
|
+
- test/classifiers/one_r_test.rb
|
95
|
+
- test/classifiers/prism_test.rb
|
96
|
+
- test/classifiers/zero_r_test.rb
|
105
97
|
- test/clusterers/average_linkage_test.rb
|
106
|
-
- test/clusterers/
|
107
|
-
- test/clusterers/ward_linkage_test.rb
|
108
|
-
- test/clusterers/complete_linkage_test.rb
|
98
|
+
- test/clusterers/bisecting_k_means_test.rb
|
109
99
|
- test/clusterers/centroid_linkage_test.rb
|
100
|
+
- test/clusterers/complete_linkage_test.rb
|
101
|
+
- test/clusterers/diana_test.rb
|
110
102
|
- test/clusterers/k_means_test.rb
|
111
|
-
- test/clusterers/
|
103
|
+
- test/clusterers/median_linkage_test.rb
|
104
|
+
- test/clusterers/single_linkage_test.rb
|
105
|
+
- test/clusterers/ward_linkage_test.rb
|
106
|
+
- test/clusterers/weighted_average_linkage_test.rb
|
107
|
+
- test/data/data_set_test.rb
|
108
|
+
- test/data/proximity_test.rb
|
109
|
+
- test/data/statistics_test.rb
|
112
110
|
- test/experiment/classifier_evaluator_test.rb
|
113
|
-
- test/neural_network/hopfield_test.rb
|
114
|
-
- test/neural_network/backpropagation_test.rb
|
115
|
-
- test/classifiers/zero_r_test.rb
|
116
|
-
- test/classifiers/multilayer_perceptron_test.rb
|
117
|
-
- test/classifiers/prism_test.rb
|
118
|
-
- test/classifiers/one_r_test.rb
|
119
|
-
- test/classifiers/hyperpipes_test.rb
|
120
|
-
- test/classifiers/id3_test.rb
|
121
|
-
- test/genetic_algorithm/genetic_algorithm_test.rb
|
122
111
|
- test/genetic_algorithm/chromosome_test.rb
|
123
|
-
- test/
|
124
|
-
- test/
|
125
|
-
- test/
|
112
|
+
- test/genetic_algorithm/genetic_algorithm_test.rb
|
113
|
+
- test/neural_network/backpropagation_test.rb
|
114
|
+
- test/neural_network/hopfield_test.rb
|
115
|
+
- test/som/som_test.rb
|
116
|
+
rdoc_options: []
|
117
|
+
|
118
|
+
extra_rdoc_files:
|
119
|
+
- README.rdoc
|
120
|
+
executables: []
|
121
|
+
|
122
|
+
extensions: []
|
123
|
+
|
124
|
+
requirements: []
|
125
|
+
|
126
|
+
dependencies: []
|
127
|
+
|
@@ -1,47 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/google_search'
|
2
|
-
require File.dirname(__FILE__) + '/build_keywords'
|
3
|
-
require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/average_linkage'
|
4
|
-
require 'rubygems'
|
5
|
-
require 'hpricot'
|
6
|
-
require 'net/http'
|
7
|
-
require 'benchmark'
|
8
|
-
|
9
|
-
SITES_TO_CLASSIFY = [
|
10
|
-
"www.foxnews.com", "www.usatoday.com", "scm.jadeferret.com",
|
11
|
-
"www.accurev.com", "www.lastminute.com", "subversion.tigris.org",
|
12
|
-
"news.yahoo.com", "news.bbc.co.uk", "www.orbitz.com"
|
13
|
-
]
|
14
|
-
|
15
|
-
# Return array of keywords for the site
|
16
|
-
def get_keywords(site)
|
17
|
-
response = Net::HTTP.get_response(site, "/")
|
18
|
-
Hpricot(response.body).
|
19
|
-
search("meta[@name='keywords']")[0]. #Select meta keywords element
|
20
|
-
attributes["content"]. #Select its content
|
21
|
-
split(","). #Keywords are coma separated
|
22
|
-
collect{ |k| k.strip.downcase } #Remove start and end white spaces
|
23
|
-
end
|
24
|
-
|
25
|
-
# Get keywords data for each website
|
26
|
-
Site = Struct.new("Site", :name, :keywords)
|
27
|
-
sites = SITES_TO_CLASSIFY.collect do |site_name|
|
28
|
-
Site.new(site_name, get_keywords(site_name))
|
29
|
-
end
|
30
|
-
data_set = Ai4r::Data::DataSet.new(:data_items => sites,
|
31
|
-
:data_labels => Site.members)
|
32
|
-
|
33
|
-
# The distance between sites depends on the keywords collected from internet
|
34
|
-
keywords_distance_function = lambda do |x,y|
|
35
|
-
return Ai4r::Data::Proximity.simple_matching(x.keyword, y.keywords)
|
36
|
-
end
|
37
|
-
|
38
|
-
# Create the clusters
|
39
|
-
clusterer = Ai4r::Clusterers::AverageLinkage.new
|
40
|
-
clusterer.distance_function = keywords_distance_function
|
41
|
-
clusterer.build(data_set, 3)
|
42
|
-
|
43
|
-
# Print results
|
44
|
-
clusterer.clusters.each do |cluster|
|
45
|
-
puts cluster.data_items.collect {|item| item.name}.join(", ")
|
46
|
-
puts "============"
|
47
|
-
end
|