nirvdrum-ai4r 1.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/.rakeTasks +7 -0
- data/README.rdoc +56 -0
- data/Rakefile.rb +42 -0
- data/VERSION +1 -0
- data/ai4r.gemspec +221 -0
- data/change_log +49 -0
- data/examples/classifiers/id3_data.csv +121 -0
- data/examples/classifiers/id3_example.rb +29 -0
- data/examples/classifiers/naive_bayes_data.csv +11 -0
- data/examples/classifiers/naive_bayes_example.rb +16 -0
- data/examples/classifiers/results.txt +31 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
- data/examples/genetic_algorithm/travel_cost.csv +16 -0
- data/examples/neural_network/backpropagation_example.rb +67 -0
- data/examples/neural_network/patterns_with_base_noise.rb +68 -0
- data/examples/neural_network/patterns_with_noise.rb +66 -0
- data/examples/neural_network/training_patterns.rb +68 -0
- data/examples/neural_network/xor_example.rb +35 -0
- data/examples/som/som_data.rb +156 -0
- data/examples/som/som_multi_node_example.rb +22 -0
- data/examples/som/som_single_example.rb +24 -0
- data/lib/ai4r.rb +32 -0
- data/lib/ai4r/classifiers/classifier.rb +59 -0
- data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
- data/lib/ai4r/classifiers/id3.rb +326 -0
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
- data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
- data/lib/ai4r/classifiers/one_r.rb +110 -0
- data/lib/ai4r/classifiers/prism.rb +197 -0
- data/lib/ai4r/classifiers/zero_r.rb +73 -0
- data/lib/ai4r/clusterers/average_linkage.rb +59 -0
- data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
- data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
- data/lib/ai4r/clusterers/clusterer.rb +61 -0
- data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
- data/lib/ai4r/clusterers/diana.rb +139 -0
- data/lib/ai4r/clusterers/k_means.rb +126 -0
- data/lib/ai4r/clusterers/median_linkage.rb +61 -0
- data/lib/ai4r/clusterers/single_linkage.rb +194 -0
- data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
- data/lib/ai4r/data/data_set.rb +266 -0
- data/lib/ai4r/data/parameterizable.rb +64 -0
- data/lib/ai4r/data/proximity.rb +100 -0
- data/lib/ai4r/data/statistics.rb +77 -0
- data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
- data/lib/ai4r/neural_network/backpropagation.rb +293 -0
- data/lib/ai4r/neural_network/hopfield.rb +149 -0
- data/lib/ai4r/som/layer.rb +68 -0
- data/lib/ai4r/som/node.rb +96 -0
- data/lib/ai4r/som/som.rb +155 -0
- data/lib/ai4r/som/two_phase_layer.rb +90 -0
- data/site/forrest.properties +152 -0
- data/site/forrest.properties.dispatcher.properties +25 -0
- data/site/forrest.properties.xml +29 -0
- data/site/src/documentation/README.txt +7 -0
- data/site/src/documentation/classes/CatalogManager.properties +62 -0
- data/site/src/documentation/content/locationmap.xml +72 -0
- data/site/src/documentation/content/xdocs/downloads.html +9 -0
- data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +294 -0
- data/site/src/documentation/content/xdocs/index.xml +155 -0
- data/site/src/documentation/content/xdocs/machineLearning.xml +131 -0
- data/site/src/documentation/content/xdocs/neuralNetworks.xml +270 -0
- data/site/src/documentation/content/xdocs/site.xml +54 -0
- data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
- data/site/src/documentation/content/xdocs/tabs.xml +35 -0
- data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
- data/site/src/documentation/resources/images/c.png +0 -0
- data/site/src/documentation/resources/images/c_wbn.png +0 -0
- data/site/src/documentation/resources/images/c_wn.png +0 -0
- data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
- data/site/src/documentation/resources/images/ero.gif +0 -0
- data/site/src/documentation/resources/images/europe2.png +0 -0
- data/site/src/documentation/resources/images/europe3.png +0 -0
- data/site/src/documentation/resources/images/fitness.png +0 -0
- data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
- data/site/src/documentation/resources/images/icon-a.png +0 -0
- data/site/src/documentation/resources/images/icon-b.png +0 -0
- data/site/src/documentation/resources/images/icon.png +0 -0
- data/site/src/documentation/resources/images/jadeferret.png +0 -0
- data/site/src/documentation/resources/images/my_email.png +0 -0
- data/site/src/documentation/resources/images/neural_network_example.png +0 -0
- data/site/src/documentation/resources/images/project-logo.png +0 -0
- data/site/src/documentation/resources/images/rubyforge.png +0 -0
- data/site/src/documentation/resources/images/s.png +0 -0
- data/site/src/documentation/resources/images/s_wbn.png +0 -0
- data/site/src/documentation/resources/images/s_wn.png +0 -0
- data/site/src/documentation/resources/images/sigmoid.png +0 -0
- data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
- data/site/src/documentation/resources/images/t.png +0 -0
- data/site/src/documentation/resources/images/t_wbn.png +0 -0
- data/site/src/documentation/resources/images/t_wn.png +0 -0
- data/site/src/documentation/resources/schema/catalog.xcat +29 -0
- data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
- data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
- data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
- data/site/src/documentation/sitemap.xmap +66 -0
- data/site/src/documentation/skinconf.xml +418 -0
- data/site/src/documentation/translations/langcode.xml +29 -0
- data/site/src/documentation/translations/languages_de.xml +24 -0
- data/site/src/documentation/translations/languages_en.xml +24 -0
- data/site/src/documentation/translations/languages_es.xml +22 -0
- data/site/src/documentation/translations/languages_fr.xml +24 -0
- data/site/src/documentation/translations/languages_nl.xml +24 -0
- data/site/src/documentation/translations/menu.xml +33 -0
- data/site/src/documentation/translations/menu_af.xml +33 -0
- data/site/src/documentation/translations/menu_de.xml +33 -0
- data/site/src/documentation/translations/menu_es.xml +33 -0
- data/site/src/documentation/translations/menu_fr.xml +33 -0
- data/site/src/documentation/translations/menu_it.xml +33 -0
- data/site/src/documentation/translations/menu_nl.xml +33 -0
- data/site/src/documentation/translations/menu_no.xml +33 -0
- data/site/src/documentation/translations/menu_ru.xml +33 -0
- data/site/src/documentation/translations/menu_sk.xml +33 -0
- data/site/src/documentation/translations/tabs.xml +22 -0
- data/site/src/documentation/translations/tabs_de.xml +22 -0
- data/site/src/documentation/translations/tabs_es.xml +22 -0
- data/site/src/documentation/translations/tabs_fr.xml +22 -0
- data/site/src/documentation/translations/tabs_nl.xml +22 -0
- data/test/classifiers/hyperpipes_test.rb +84 -0
- data/test/classifiers/id3_test.rb +208 -0
- data/test/classifiers/multilayer_perceptron_test.rb +79 -0
- data/test/classifiers/naive_bayes_test.rb +43 -0
- data/test/classifiers/one_r_test.rb +62 -0
- data/test/classifiers/prism_test.rb +85 -0
- data/test/classifiers/zero_r_test.rb +50 -0
- data/test/clusterers/average_linkage_test.rb +51 -0
- data/test/clusterers/bisecting_k_means_test.rb +66 -0
- data/test/clusterers/centroid_linkage_test.rb +53 -0
- data/test/clusterers/complete_linkage_test.rb +57 -0
- data/test/clusterers/diana_test.rb +69 -0
- data/test/clusterers/k_means_test.rb +100 -0
- data/test/clusterers/median_linkage_test.rb +53 -0
- data/test/clusterers/single_linkage_test.rb +122 -0
- data/test/clusterers/ward_linkage_test.rb +53 -0
- data/test/clusterers/weighted_average_linkage_test.rb +53 -0
- data/test/data/data_set.csv +121 -0
- data/test/data/data_set_test.rb +96 -0
- data/test/data/proximity_test.rb +81 -0
- data/test/data/statistics_data_set.csv +5 -0
- data/test/data/statistics_test.rb +65 -0
- data/test/experiment/classifier_evaluator_test.rb +76 -0
- data/test/genetic_algorithm/chromosome_test.rb +58 -0
- data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
- data/test/neural_network/backpropagation_test.rb +69 -0
- data/test/neural_network/hopfield_test.rb +72 -0
- data/test/som/som_test.rb +97 -0
- metadata +238 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
module Ai4r
|
11
|
+
module Data
|
12
|
+
module Parameterizable
|
13
|
+
|
14
|
+
module ClassMethods
|
15
|
+
|
16
|
+
# Get info on what can be parameterized on this algorithm.
|
17
|
+
# It returns a hash with the following format:
|
18
|
+
# { :param_name => "Info on the parameter" }
|
19
|
+
def get_parameters_info
|
20
|
+
return @_params_info_ || {}
|
21
|
+
end
|
22
|
+
|
23
|
+
# Set info on what can be parameterized on this algorithm.
|
24
|
+
# You must provide a hash with the following format:
|
25
|
+
# { :param_name => "Info on the parameter" }
|
26
|
+
def parameters_info(params_info)
|
27
|
+
@_params_info_ = params_info
|
28
|
+
params_info.keys.each do |param|
|
29
|
+
attr_accessor param
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Set parameter values on this algorithm instance.
|
35
|
+
# You must provide a hash with the folowing format:
|
36
|
+
# { :param_name => parameter_value }
|
37
|
+
def set_parameters(params)
|
38
|
+
self.class.get_parameters_info.keys.each do | key |
|
39
|
+
if self.respond_to?("#{key}=".to_sym)
|
40
|
+
send("#{key}=".to_sym, params[key]) if params.has_key? key
|
41
|
+
end
|
42
|
+
end
|
43
|
+
return self
|
44
|
+
end
|
45
|
+
|
46
|
+
# Get parameter values on this algorithm instance.
|
47
|
+
# Returns a hash with the folowing format:
|
48
|
+
# { :param_name => parameter_value }
|
49
|
+
def get_parameters
|
50
|
+
params = {}
|
51
|
+
self.class.get_parameters_info.keys.each do | key |
|
52
|
+
params[key] = send(key) if self.respond_to?(key)
|
53
|
+
end
|
54
|
+
return params
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.included(base)
|
58
|
+
base.extend(ClassMethods)
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
module Ai4r
|
11
|
+
module Data
|
12
|
+
|
13
|
+
# This module provides classical distance functions
|
14
|
+
module Proximity
|
15
|
+
|
16
|
+
# This is a faster computational replacement for eclidean distance.
|
17
|
+
# Parameters a and b are vectors with continuous attributes.
|
18
|
+
def self.squared_euclidean_distance(a, b)
|
19
|
+
sum = 0.0
|
20
|
+
a.each_with_index do |item_a, i|
|
21
|
+
item_b = b[i]
|
22
|
+
sum += (item_a - item_b)**2
|
23
|
+
end
|
24
|
+
return sum
|
25
|
+
end
|
26
|
+
|
27
|
+
# Euclidean distance, or L2 norm.
|
28
|
+
# Parameters a and b are vectors with continuous attributes.
|
29
|
+
# Euclidean distance tends to form hyperspherical
|
30
|
+
# clusters(Clustering, Xu and Wunsch, 2009).
|
31
|
+
# Translations and rotations do not cause a
|
32
|
+
# distortion in distance relation (Duda et al, 2001)
|
33
|
+
# If attributes are measured with different units,
|
34
|
+
# attributes with larger values and variance will
|
35
|
+
# dominate the metric.
|
36
|
+
def self.euclidean_distance(a, b)
|
37
|
+
Math.sqrt(squared_euclidean_distance(a, b))
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
# city block, Manhattan distance, or L1 norm.
|
42
|
+
# Parameters a and b are vectors with continuous attributes.
|
43
|
+
def self.manhattan_distance(a, b)
|
44
|
+
sum = 0.0
|
45
|
+
a.each_with_index do |item_a, i|
|
46
|
+
item_b = b[i]
|
47
|
+
sum += (item_a - item_b).abs
|
48
|
+
end
|
49
|
+
return sum
|
50
|
+
end
|
51
|
+
|
52
|
+
# Sup distance, or L-intinity norm
|
53
|
+
# Parameters a and b are vectors with continuous attributes.
|
54
|
+
def self.sup_distance(a, b)
|
55
|
+
distance = 0.0
|
56
|
+
a.each_with_index do |item_a, i|
|
57
|
+
item_b = b[i]
|
58
|
+
diff = (item_a - item_b).abs
|
59
|
+
distance = diff if diff > distance
|
60
|
+
end
|
61
|
+
return distance
|
62
|
+
end
|
63
|
+
|
64
|
+
# The Hamming distance between two attributes vectors of equal
|
65
|
+
# length is the number of attributes for which the corresponding
|
66
|
+
# vectors are different
|
67
|
+
# This distance function is frequently used with binary attributes,
|
68
|
+
# though it can be used with other discrete attributes.
|
69
|
+
def self.hamming_distance(a,b)
|
70
|
+
count = 0
|
71
|
+
a.each_index do |i|
|
72
|
+
count += 1 if a[i] != b[i]
|
73
|
+
end
|
74
|
+
return count
|
75
|
+
end
|
76
|
+
|
77
|
+
# The "Simple matching" distance between two attribute sets is given
|
78
|
+
# by the number of values present on both vectors.
|
79
|
+
# If sets a and b have lengths da and db then:
|
80
|
+
#
|
81
|
+
# S = 2/(da + db) * Number of values present on both sets
|
82
|
+
# D = 1.0/S - 1
|
83
|
+
#
|
84
|
+
# Some considerations:
|
85
|
+
# * a and b must not include repeated items
|
86
|
+
# * all attributes are treated equally
|
87
|
+
# * all attributes are treated equally
|
88
|
+
def self.simple_matching_distance(a,b)
|
89
|
+
similarity = 0.0
|
90
|
+
a.each {|item| similarity += 2 if b.include?(item)}
|
91
|
+
similarity /= (a.length + b.length)
|
92
|
+
return 1.0/similarity - 1
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/data_set'
|
11
|
+
|
12
|
+
module Ai4r
|
13
|
+
module Data
|
14
|
+
|
15
|
+
# This module provides some basic statistics functions to operate on
|
16
|
+
# data set attributes.
|
17
|
+
module Statistics
|
18
|
+
|
19
|
+
# Get the sample mean
|
20
|
+
def self.mean(data_set, attribute)
|
21
|
+
index = data_set.get_index(attribute)
|
22
|
+
sum = 0.0
|
23
|
+
data_set.data_items.each { |item| sum += item[index] }
|
24
|
+
return sum / data_set.data_items.length
|
25
|
+
end
|
26
|
+
|
27
|
+
# Get the variance.
|
28
|
+
# You can provide the mean if you have it already, to speed up things.
|
29
|
+
def self.variance(data_set, attribute, mean = nil)
|
30
|
+
index = data_set.get_index(attribute)
|
31
|
+
mean = mean(data_set, attribute)
|
32
|
+
sum = 0.0
|
33
|
+
data_set.data_items.each { |item| sum += (item[index]-mean)**2 }
|
34
|
+
return sum / (data_set.data_items.length-1)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get the standard deviation.
|
38
|
+
# You can provide the variance if you have it already, to speed up things.
|
39
|
+
def self.standard_deviation(data_set, attribute, variance = nil)
|
40
|
+
variance ||= variance(data_set, attribute)
|
41
|
+
Math.sqrt(variance)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Get the sample mode.
|
45
|
+
def self.mode(data_set, attribute)
|
46
|
+
index = data_set.get_index(attribute)
|
47
|
+
count = Hash.new {0}
|
48
|
+
max_count = 0
|
49
|
+
mode = nil
|
50
|
+
data_set.data_items.each do |data_item|
|
51
|
+
attr_value = data_item[index]
|
52
|
+
attr_count = (count[attr_value] += 1)
|
53
|
+
if attr_count > max_count
|
54
|
+
mode = attr_value
|
55
|
+
max_count = attr_count
|
56
|
+
end
|
57
|
+
end
|
58
|
+
return mode
|
59
|
+
end
|
60
|
+
|
61
|
+
# Get the maximum value of an attribute in the data set
|
62
|
+
def self.max(data_set, attribute)
|
63
|
+
index = data_set.get_index(attribute)
|
64
|
+
item = data_set.data_items.max {|x,y| x[index] <=> y[index]}
|
65
|
+
return (item) ? item[index] : (-1.0/0)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Get the minimum value of an attribute in the data set
|
69
|
+
def self.min(data_set, attribute)
|
70
|
+
index = data_set.get_index(attribute)
|
71
|
+
item = data_set.data_items.min {|x,y| x[index] <=> y[index]}
|
72
|
+
return (item) ? item[index] : (1.0/0)
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
3
|
+
|
4
|
+
|
5
|
+
module Ai4r
|
6
|
+
|
7
|
+
module Experiment
|
8
|
+
|
9
|
+
# The ClassifierEvaluator is useful to compare different classifiers
|
10
|
+
# algorithms. The evaluator builds the Classifiers using the same data
|
11
|
+
# examples, and provides methods to evalute their performance in parallel.
|
12
|
+
# It is a nice tool to compare and evaluate the performance of different
|
13
|
+
# algorithms, the same algorithm with different parameters, or your own new
|
14
|
+
# algorithm against the classic classifiers.
|
15
|
+
class ClassifierEvaluator
|
16
|
+
|
17
|
+
attr_reader :build_times, :eval_times, :classifiers
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
@classifiers = []
|
21
|
+
end
|
22
|
+
|
23
|
+
# Add a classifier instance to the test batch
|
24
|
+
def add_classifier(classifier)
|
25
|
+
@classifiers << classifier
|
26
|
+
return self
|
27
|
+
end
|
28
|
+
|
29
|
+
alias :<< :add_classifier
|
30
|
+
|
31
|
+
# Build all classifiers, using data examples found in data_set.
|
32
|
+
# The last attribute of each item is considered as the
|
33
|
+
# item class.
|
34
|
+
# Building times are measured by separate, and can be accessed
|
35
|
+
# through build_times attribute reader.
|
36
|
+
def build(data_set)
|
37
|
+
@build_times = []
|
38
|
+
@classifiers.each do |classifier|
|
39
|
+
@build_times << Benchmark.measure { classifier.build data_set }
|
40
|
+
end
|
41
|
+
return self
|
42
|
+
end
|
43
|
+
|
44
|
+
# You can evaluate new data, predicting its class.
|
45
|
+
# e.g.
|
46
|
+
# classifier.eval(['New York', '<30', 'F'])
|
47
|
+
# => ['Y', 'Y', 'Y', 'N', 'Y', 'Y', 'N']
|
48
|
+
# Evaluation times are measured by separate, and can be accessed
|
49
|
+
# through eval_times attribute reader.
|
50
|
+
def eval(data)
|
51
|
+
@eval_times = []
|
52
|
+
results = []
|
53
|
+
@classifiers.each do |classifier|
|
54
|
+
@eval_times << Benchmark.measure { results << classifier.eval(data) }
|
55
|
+
end
|
56
|
+
return results
|
57
|
+
end
|
58
|
+
|
59
|
+
# Test classifiers using a data set. The last attribute of each item
|
60
|
+
# is considered as the expected class. Data items are evaluated
|
61
|
+
# using all classifiers: evalution times, sucess rate, and quantity of
|
62
|
+
# classification errors are returned in a data set.
|
63
|
+
# The return data set has a row for every classifier tested, and the
|
64
|
+
# following attributes:
|
65
|
+
# ["Classifier", "Testing Time", "Errors", "Success rate"]
|
66
|
+
def test(data_set)
|
67
|
+
result_data_items = []
|
68
|
+
@classifiers.each do |classifier|
|
69
|
+
result_data_items << test_classifier(classifier, data_set)
|
70
|
+
end
|
71
|
+
return Ai4r::Data::DataSet.new(:data_items => result_data_items,
|
72
|
+
:data_labels => ["Classifier","Testing Time","Errors","Success rate"])
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
def test_classifier(classifier, data_set)
|
77
|
+
data_set_size = data_set.data_items.length
|
78
|
+
errors = 0
|
79
|
+
testing_times = Benchmark.measure do
|
80
|
+
data_set.data_items.each do |data_item|
|
81
|
+
data = data_item[0...-1]
|
82
|
+
expected_result = data_item.last
|
83
|
+
result = classifier.eval data
|
84
|
+
errors += 1 if result != expected_result
|
85
|
+
end
|
86
|
+
end
|
87
|
+
return [classifier, testing_times.real, errors,
|
88
|
+
((data_set_size-errors*1.0)/data_set_size)]
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
@@ -0,0 +1,270 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
module Ai4r
|
10
|
+
|
11
|
+
# The GeneticAlgorithm module implements the GeneticSearch and Chromosome
|
12
|
+
# classes. The GeneticSearch is a generic class, and can be used to solved
|
13
|
+
# any kind of problems. The GeneticSearch class performs a stochastic search
|
14
|
+
# of the solution of a given problem.
|
15
|
+
#
|
16
|
+
# The Chromosome is "problem specific". Ai4r built-in Chromosome class was
|
17
|
+
# designed to model the Travelling salesman problem. If you want to solve other
|
18
|
+
# type of problem, you will have to modify the Chromosome class, by overwriting
|
19
|
+
# its fitness, reproduce, and mutate functions, to model your specific problem.
|
20
|
+
module GeneticAlgorithm
|
21
|
+
|
22
|
+
# This class is used to automatically:
|
23
|
+
#
|
24
|
+
# 1. Choose initial population
|
25
|
+
# 2. Evaluate the fitness of each individual in the population
|
26
|
+
# 3. Repeat
|
27
|
+
# 1. Select best-ranking individuals to reproduce
|
28
|
+
# 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
|
29
|
+
# 3. Evaluate the individual fitnesses of the offspring
|
30
|
+
# 4. Replace worst ranked part of population with offspring
|
31
|
+
# 4. Until termination
|
32
|
+
#
|
33
|
+
# If you want to customize the algorithm, you must modify any of the following classes:
|
34
|
+
# - Chromosome
|
35
|
+
# - Population
|
36
|
+
class GeneticSearch
|
37
|
+
|
38
|
+
attr_accessor :population
|
39
|
+
|
40
|
+
|
41
|
+
def initialize(initial_population_size, generations)
|
42
|
+
@population_size = initial_population_size
|
43
|
+
@max_generation = generations
|
44
|
+
@generation = 0
|
45
|
+
end
|
46
|
+
|
47
|
+
# 1. Choose initial population
|
48
|
+
# 2. Evaluate the fitness of each individual in the population
|
49
|
+
# 3. Repeat
|
50
|
+
# 1. Select best-ranking individuals to reproduce
|
51
|
+
# 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
|
52
|
+
# 3. Evaluate the individual fitnesses of the offspring
|
53
|
+
# 4. Replace worst ranked part of population with offspring
|
54
|
+
# 4. Until termination
|
55
|
+
# 5. Return the best chromosome
|
56
|
+
def run
|
57
|
+
generate_initial_population #Generate initial population
|
58
|
+
@max_generation.times do
|
59
|
+
selected_to_breed = selection #Evaluates current population
|
60
|
+
offsprings = reproduction selected_to_breed #Generate the population for this new generation
|
61
|
+
replace_worst_ranked offsprings
|
62
|
+
end
|
63
|
+
return best_chromosome
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
def generate_initial_population
|
68
|
+
@population = []
|
69
|
+
@population_size.times do
|
70
|
+
population << Chromosome.seed
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Select best-ranking individuals to reproduce
|
75
|
+
#
|
76
|
+
# Selection is the stage of a genetic algorithm in which individual
|
77
|
+
# genomes are chosen from a population for later breeding.
|
78
|
+
# There are several generic selection algorithms, such as
|
79
|
+
# tournament selection and roulette wheel selection. We implemented the
|
80
|
+
# latest.
|
81
|
+
#
|
82
|
+
# Steps:
|
83
|
+
#
|
84
|
+
# 1. The fitness function is evaluated for each individual, providing fitness values
|
85
|
+
# 2. The population is sorted by descending fitness values.
|
86
|
+
# 3. The fitness values ar then normalized. (Highest fitness gets 1, lowest fitness gets 0). The normalized value is stored in the "normalized_fitness" attribute of the chromosomes.
|
87
|
+
# 4. A random number R is chosen. R is between 0 and the accumulated normalized value (all the normalized fitness values added togheter).
|
88
|
+
# 5. The selected individual is the first one whose accumulated normalized value (its is normalized value plus the normalized values of the chromosomes prior it) greater than R.
|
89
|
+
# 6. We repeat steps 4 and 5, 2/3 times the population size.
|
90
|
+
def selection
|
91
|
+
@population.sort! { |a, b| b.fitness <=> a.fitness}
|
92
|
+
best_fitness = @population[0].fitness
|
93
|
+
worst_fitness = @population.last.fitness
|
94
|
+
acum_fitness = 0
|
95
|
+
if best_fitness-worst_fitness > 0
|
96
|
+
@population.each do |chromosome|
|
97
|
+
chromosome.normalized_fitness = (chromosome.fitness - worst_fitness)/(best_fitness-worst_fitness)
|
98
|
+
acum_fitness += chromosome.normalized_fitness
|
99
|
+
end
|
100
|
+
else
|
101
|
+
@population.each { |chromosome| chromosome.normalized_fitness = 1}
|
102
|
+
end
|
103
|
+
selected_to_breed = []
|
104
|
+
((2*@population_size)/3).times do
|
105
|
+
selected_to_breed << select_random_individual(acum_fitness)
|
106
|
+
end
|
107
|
+
selected_to_breed
|
108
|
+
end
|
109
|
+
|
110
|
+
# We combine each pair of selected chromosome using the method
|
111
|
+
# Chromosome.reproduce
|
112
|
+
#
|
113
|
+
# The reproduction will also call the Chromosome.mutate method with
|
114
|
+
# each member of the population. You should implement Chromosome.mutate
|
115
|
+
# to only change (mutate) randomly. E.g. You could effectivly change the
|
116
|
+
# chromosome only if
|
117
|
+
# rand < ((1 - chromosome.normalized_fitness) * 0.4)
|
118
|
+
def reproduction(selected_to_breed)
|
119
|
+
offsprings = []
|
120
|
+
0.upto(selected_to_breed.length/2-1) do |i|
|
121
|
+
offsprings << Chromosome.reproduce(selected_to_breed[2*i], selected_to_breed[2*i+1])
|
122
|
+
end
|
123
|
+
@population.each do |individual|
|
124
|
+
Chromosome.mutate(individual)
|
125
|
+
end
|
126
|
+
return offsprings
|
127
|
+
end
|
128
|
+
|
129
|
+
# Replace worst ranked part of population with offspring
|
130
|
+
def replace_worst_ranked(offsprings)
|
131
|
+
size = offsprings.length
|
132
|
+
@population = @population [0..((-1*size)-1)] + offsprings
|
133
|
+
end
|
134
|
+
|
135
|
+
# Select the best chromosome in the population
|
136
|
+
def best_chromosome
|
137
|
+
the_best = @population[0]
|
138
|
+
@population.each do |chromosome|
|
139
|
+
the_best = chromosome if chromosome.fitness > the_best.fitness
|
140
|
+
end
|
141
|
+
return the_best
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
def select_random_individual(acum_fitness)
|
146
|
+
select_random_target = acum_fitness * rand
|
147
|
+
local_acum = 0
|
148
|
+
@population.each do |chromosome|
|
149
|
+
local_acum += chromosome.normalized_fitness
|
150
|
+
return chromosome if local_acum >= select_random_target
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
# A Chromosome is a representation of an individual solution for a specific
|
157
|
+
# problem. You will have to redifine the Chromosome representation for each
|
158
|
+
# particular problem, along with its fitness, mutate, reproduce, and seed
|
159
|
+
# methods.
|
160
|
+
class Chromosome
|
161
|
+
|
162
|
+
attr_accessor :data
|
163
|
+
attr_accessor :normalized_fitness
|
164
|
+
|
165
|
+
def initialize(data)
|
166
|
+
@data = data
|
167
|
+
end
|
168
|
+
|
169
|
+
# The fitness method quantifies the optimality of a solution
|
170
|
+
# (that is, a chromosome) in a genetic algorithm so that that particular
|
171
|
+
# chromosome may be ranked against all the other chromosomes.
|
172
|
+
#
|
173
|
+
# Optimal chromosomes, or at least chromosomes which are more optimal,
|
174
|
+
# are allowed to breed and mix their datasets by any of several techniques,
|
175
|
+
# producing a new generation that will (hopefully) be even better.
|
176
|
+
def fitness
|
177
|
+
return @fitness if @fitness
|
178
|
+
last_token = @data[0]
|
179
|
+
cost = 0
|
180
|
+
@data[1..-1].each do |token|
|
181
|
+
cost += @@costs[last_token][token]
|
182
|
+
last_token = token
|
183
|
+
end
|
184
|
+
@fitness = -1 * cost
|
185
|
+
return @fitness
|
186
|
+
end
|
187
|
+
|
188
|
+
# mutation method is used to maintain genetic diversity from one
|
189
|
+
# generation of a population of chromosomes to the next. It is analogous
|
190
|
+
# to biological mutation.
|
191
|
+
#
|
192
|
+
# The purpose of mutation in GAs is to allow the
|
193
|
+
# algorithm to avoid local minima by preventing the population of
|
194
|
+
# chromosomes from becoming too similar to each other, thus slowing or even
|
195
|
+
# stopping evolution.
|
196
|
+
#
|
197
|
+
# Calling the mutate function will "probably" slightly change a chromosome
|
198
|
+
# randomly.
|
199
|
+
#
|
200
|
+
# This implementation of "mutation" will (probably) reverse the
|
201
|
+
# order of 2 consecutive randome nodes
|
202
|
+
# (e.g. from [ 0, 1, 2, 4] to [0, 2, 1, 4]) if:
|
203
|
+
# ((1 - chromosome.normalized_fitness) * 0.4)
|
204
|
+
def self.mutate(chromosome)
|
205
|
+
if chromosome.normalized_fitness && rand < ((1 - chromosome.normalized_fitness) * 0.3)
|
206
|
+
data = chromosome.data
|
207
|
+
index = rand(data.length-1)
|
208
|
+
data[index], data[index+1] = data[index+1], data[index]
|
209
|
+
chromosome.data = data
|
210
|
+
@fitness = nil
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
# Reproduction method is used to combine two chromosomes (solutions) into
|
215
|
+
# a single new chromosome. There are several ways to
|
216
|
+
# combine two chromosomes: One-point crossover, Two-point crossover,
|
217
|
+
# "Cut and splice", edge recombination, and more.
|
218
|
+
#
|
219
|
+
# The method is usually dependant of the problem domain.
|
220
|
+
# In this case, we have implemented edge recombination, wich is the
|
221
|
+
# most used reproduction algorithm for the Travelling salesman problem.
|
222
|
+
def self.reproduce(a, b)
|
223
|
+
data_size = @@costs[0].length
|
224
|
+
available = []
|
225
|
+
0.upto(data_size-1) { |n| available << n }
|
226
|
+
token = a.data[0]
|
227
|
+
spawn = [token]
|
228
|
+
available.delete(token)
|
229
|
+
while available.length > 0 do
|
230
|
+
#Select next
|
231
|
+
if token != b.data.last && available.include?(b.data[b.data.index(token)+1])
|
232
|
+
next_token = b.data[b.data.index(token)+1]
|
233
|
+
elsif token != a.data.last && available.include?(a.data[a.data.index(token)+1])
|
234
|
+
next_token = a.data[a.data.index(token)+1]
|
235
|
+
else
|
236
|
+
next_token = available[rand(available.length)]
|
237
|
+
end
|
238
|
+
#Add to spawn
|
239
|
+
token = next_token
|
240
|
+
available.delete(token)
|
241
|
+
spawn << next_token
|
242
|
+
a, b = b, a if rand < 0.4
|
243
|
+
end
|
244
|
+
return Chromosome.new(spawn)
|
245
|
+
end
|
246
|
+
|
247
|
+
# Initializes an individual solution (chromosome) for the initial
|
248
|
+
# population. Usually the chromosome is generated randomly, but you can
|
249
|
+
# use some problem domain knowledge, to generate a
|
250
|
+
# (probably) better initial solution.
|
251
|
+
def self.seed
|
252
|
+
data_size = @@costs[0].length
|
253
|
+
available = []
|
254
|
+
0.upto(data_size-1) { |n| available << n }
|
255
|
+
seed = []
|
256
|
+
while available.length > 0 do
|
257
|
+
index = rand(available.length)
|
258
|
+
seed << available.delete_at(index)
|
259
|
+
end
|
260
|
+
return Chromosome.new(seed)
|
261
|
+
end
|
262
|
+
|
263
|
+
def self.set_cost_matrix(costs)
|
264
|
+
@@costs = costs
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
269
|
+
|
270
|
+
end
|