ai4ruby 1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +47 -0
- data/examples/classifiers/id3_data.csv +121 -0
- data/examples/classifiers/id3_example.rb +29 -0
- data/examples/classifiers/naive_bayes_data.csv +11 -0
- data/examples/classifiers/naive_bayes_example.rb +16 -0
- data/examples/classifiers/results.txt +31 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
- data/examples/genetic_algorithm/travel_cost.csv +16 -0
- data/examples/neural_network/backpropagation_example.rb +67 -0
- data/examples/neural_network/patterns_with_base_noise.rb +68 -0
- data/examples/neural_network/patterns_with_noise.rb +66 -0
- data/examples/neural_network/training_patterns.rb +68 -0
- data/examples/neural_network/xor_example.rb +35 -0
- data/examples/som/som_data.rb +156 -0
- data/examples/som/som_multi_node_example.rb +22 -0
- data/examples/som/som_single_example.rb +24 -0
- data/lib/ai4r.rb +33 -0
- data/lib/ai4r/classifiers/classifier.rb +62 -0
- data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
- data/lib/ai4r/classifiers/ib1.rb +121 -0
- data/lib/ai4r/classifiers/id3.rb +326 -0
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
- data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
- data/lib/ai4r/classifiers/one_r.rb +110 -0
- data/lib/ai4r/classifiers/prism.rb +197 -0
- data/lib/ai4r/classifiers/zero_r.rb +73 -0
- data/lib/ai4r/clusterers/average_linkage.rb +59 -0
- data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
- data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
- data/lib/ai4r/clusterers/clusterer.rb +61 -0
- data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
- data/lib/ai4r/clusterers/diana.rb +139 -0
- data/lib/ai4r/clusterers/k_means.rb +126 -0
- data/lib/ai4r/clusterers/median_linkage.rb +61 -0
- data/lib/ai4r/clusterers/single_linkage.rb +194 -0
- data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
- data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +31 -0
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
- data/lib/ai4r/data/data_set.rb +266 -0
- data/lib/ai4r/data/parameterizable.rb +64 -0
- data/lib/ai4r/data/proximity.rb +100 -0
- data/lib/ai4r/data/statistics.rb +77 -0
- data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
- data/lib/ai4r/neural_network/backpropagation.rb +326 -0
- data/lib/ai4r/neural_network/hopfield.rb +149 -0
- data/lib/ai4r/som/layer.rb +68 -0
- data/lib/ai4r/som/node.rb +96 -0
- data/lib/ai4r/som/som.rb +155 -0
- data/lib/ai4r/som/two_phase_layer.rb +90 -0
- data/test/classifiers/hyperpipes_test.rb +84 -0
- data/test/classifiers/ib1_test.rb +78 -0
- data/test/classifiers/id3_test.rb +208 -0
- data/test/classifiers/multilayer_perceptron_test.rb +79 -0
- data/test/classifiers/naive_bayes_test.rb +43 -0
- data/test/classifiers/one_r_test.rb +62 -0
- data/test/classifiers/prism_test.rb +85 -0
- data/test/classifiers/zero_r_test.rb +49 -0
- data/test/clusterers/average_linkage_test.rb +51 -0
- data/test/clusterers/bisecting_k_means_test.rb +66 -0
- data/test/clusterers/centroid_linkage_test.rb +53 -0
- data/test/clusterers/complete_linkage_test.rb +57 -0
- data/test/clusterers/diana_test.rb +69 -0
- data/test/clusterers/k_means_test.rb +100 -0
- data/test/clusterers/median_linkage_test.rb +53 -0
- data/test/clusterers/single_linkage_test.rb +122 -0
- data/test/clusterers/ward_linkage_hierarchical_test.rb +61 -0
- data/test/clusterers/ward_linkage_test.rb +53 -0
- data/test/clusterers/weighted_average_linkage_test.rb +53 -0
- data/test/data/data_set_test.rb +96 -0
- data/test/data/proximity_test.rb +81 -0
- data/test/data/statistics_test.rb +65 -0
- data/test/experiment/classifier_evaluator_test.rb +76 -0
- data/test/genetic_algorithm/chromosome_test.rb +58 -0
- data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
- data/test/neural_network/backpropagation_test.rb +82 -0
- data/test/neural_network/hopfield_test.rb +72 -0
- data/test/som/som_test.rb +97 -0
- metadata +168 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
module Ai4r
|
11
|
+
module Data
|
12
|
+
module Parameterizable
|
13
|
+
|
14
|
+
module ClassMethods
|
15
|
+
|
16
|
+
# Get info on what can be parameterized on this algorithm.
|
17
|
+
# It returns a hash with the following format:
|
18
|
+
# { :param_name => "Info on the parameter" }
|
19
|
+
def get_parameters_info
|
20
|
+
return @_params_info_ || {}
|
21
|
+
end
|
22
|
+
|
23
|
+
# Set info on what can be parameterized on this algorithm.
|
24
|
+
# You must provide a hash with the following format:
|
25
|
+
# { :param_name => "Info on the parameter" }
|
26
|
+
def parameters_info(params_info)
|
27
|
+
@_params_info_ = params_info
|
28
|
+
params_info.keys.each do |param|
|
29
|
+
attr_accessor param
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Set parameter values on this algorithm instance.
|
35
|
+
# You must provide a hash with the folowing format:
|
36
|
+
# { :param_name => parameter_value }
|
37
|
+
def set_parameters(params)
|
38
|
+
self.class.get_parameters_info.keys.each do | key |
|
39
|
+
if self.respond_to?("#{key}=".to_sym)
|
40
|
+
send("#{key}=".to_sym, params[key]) if params.has_key? key
|
41
|
+
end
|
42
|
+
end
|
43
|
+
return self
|
44
|
+
end
|
45
|
+
|
46
|
+
# Get parameter values on this algorithm instance.
|
47
|
+
# Returns a hash with the folowing format:
|
48
|
+
# { :param_name => parameter_value }
|
49
|
+
def get_parameters
|
50
|
+
params = {}
|
51
|
+
self.class.get_parameters_info.keys.each do | key |
|
52
|
+
params[key] = send(key) if self.respond_to?(key)
|
53
|
+
end
|
54
|
+
return params
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.included(base)
|
58
|
+
base.extend(ClassMethods)
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
module Ai4r
|
11
|
+
module Data
|
12
|
+
|
13
|
+
# This module provides classical distance functions
|
14
|
+
module Proximity
|
15
|
+
|
16
|
+
# This is a faster computational replacement for eclidean distance.
|
17
|
+
# Parameters a and b are vectors with continuous attributes.
|
18
|
+
def self.squared_euclidean_distance(a, b)
|
19
|
+
sum = 0.0
|
20
|
+
a.each_with_index do |item_a, i|
|
21
|
+
item_b = b[i]
|
22
|
+
sum += (item_a - item_b)**2
|
23
|
+
end
|
24
|
+
return sum
|
25
|
+
end
|
26
|
+
|
27
|
+
# Euclidean distance, or L2 norm.
|
28
|
+
# Parameters a and b are vectors with continuous attributes.
|
29
|
+
# Euclidean distance tends to form hyperspherical
|
30
|
+
# clusters(Clustering, Xu and Wunsch, 2009).
|
31
|
+
# Translations and rotations do not cause a
|
32
|
+
# distortion in distance relation (Duda et al, 2001)
|
33
|
+
# If attributes are measured with different units,
|
34
|
+
# attributes with larger values and variance will
|
35
|
+
# dominate the metric.
|
36
|
+
def self.euclidean_distance(a, b)
|
37
|
+
Math.sqrt(squared_euclidean_distance(a, b))
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
# city block, Manhattan distance, or L1 norm.
|
42
|
+
# Parameters a and b are vectors with continuous attributes.
|
43
|
+
def self.manhattan_distance(a, b)
|
44
|
+
sum = 0.0
|
45
|
+
a.each_with_index do |item_a, i|
|
46
|
+
item_b = b[i]
|
47
|
+
sum += (item_a - item_b).abs
|
48
|
+
end
|
49
|
+
return sum
|
50
|
+
end
|
51
|
+
|
52
|
+
# Sup distance, or L-intinity norm
|
53
|
+
# Parameters a and b are vectors with continuous attributes.
|
54
|
+
def self.sup_distance(a, b)
|
55
|
+
distance = 0.0
|
56
|
+
a.each_with_index do |item_a, i|
|
57
|
+
item_b = b[i]
|
58
|
+
diff = (item_a - item_b).abs
|
59
|
+
distance = diff if diff > distance
|
60
|
+
end
|
61
|
+
return distance
|
62
|
+
end
|
63
|
+
|
64
|
+
# The Hamming distance between two attributes vectors of equal
|
65
|
+
# length is the number of attributes for which the corresponding
|
66
|
+
# vectors are different
|
67
|
+
# This distance function is frequently used with binary attributes,
|
68
|
+
# though it can be used with other discrete attributes.
|
69
|
+
def self.hamming_distance(a,b)
|
70
|
+
count = 0
|
71
|
+
a.each_index do |i|
|
72
|
+
count += 1 if a[i] != b[i]
|
73
|
+
end
|
74
|
+
return count
|
75
|
+
end
|
76
|
+
|
77
|
+
# The "Simple matching" distance between two attribute sets is given
|
78
|
+
# by the number of values present on both vectors.
|
79
|
+
# If sets a and b have lengths da and db then:
|
80
|
+
#
|
81
|
+
# S = 2/(da + db) * Number of values present on both sets
|
82
|
+
# D = 1.0/S - 1
|
83
|
+
#
|
84
|
+
# Some considerations:
|
85
|
+
# * a and b must not include repeated items
|
86
|
+
# * all attributes are treated equally
|
87
|
+
# * all attributes are treated equally
|
88
|
+
def self.simple_matching_distance(a,b)
|
89
|
+
similarity = 0.0
|
90
|
+
a.each {|item| similarity += 2 if b.include?(item)}
|
91
|
+
similarity /= (a.length + b.length)
|
92
|
+
return 1.0/similarity - 1
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + '/data_set'
|
11
|
+
|
12
|
+
module Ai4r
|
13
|
+
module Data
|
14
|
+
|
15
|
+
# This module provides some basic statistics functions to operate on
|
16
|
+
# data set attributes.
|
17
|
+
module Statistics
|
18
|
+
|
19
|
+
# Get the sample mean
|
20
|
+
def self.mean(data_set, attribute)
|
21
|
+
index = data_set.get_index(attribute)
|
22
|
+
sum = 0.0
|
23
|
+
data_set.data_items.each { |item| sum += item[index] }
|
24
|
+
return sum / data_set.data_items.length
|
25
|
+
end
|
26
|
+
|
27
|
+
# Get the variance.
|
28
|
+
# You can provide the mean if you have it already, to speed up things.
|
29
|
+
def self.variance(data_set, attribute, mean = nil)
|
30
|
+
index = data_set.get_index(attribute)
|
31
|
+
mean = mean(data_set, attribute)
|
32
|
+
sum = 0.0
|
33
|
+
data_set.data_items.each { |item| sum += (item[index]-mean)**2 }
|
34
|
+
return sum / (data_set.data_items.length-1)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get the standard deviation.
|
38
|
+
# You can provide the variance if you have it already, to speed up things.
|
39
|
+
def self.standard_deviation(data_set, attribute, variance = nil)
|
40
|
+
variance ||= variance(data_set, attribute)
|
41
|
+
Math.sqrt(variance)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Get the sample mode.
|
45
|
+
def self.mode(data_set, attribute)
|
46
|
+
index = data_set.get_index(attribute)
|
47
|
+
count = Hash.new {0}
|
48
|
+
max_count = 0
|
49
|
+
mode = nil
|
50
|
+
data_set.data_items.each do |data_item|
|
51
|
+
attr_value = data_item[index]
|
52
|
+
attr_count = (count[attr_value] += 1)
|
53
|
+
if attr_count > max_count
|
54
|
+
mode = attr_value
|
55
|
+
max_count = attr_count
|
56
|
+
end
|
57
|
+
end
|
58
|
+
return mode
|
59
|
+
end
|
60
|
+
|
61
|
+
# Get the maximum value of an attribute in the data set
|
62
|
+
def self.max(data_set, attribute)
|
63
|
+
index = data_set.get_index(attribute)
|
64
|
+
item = data_set.data_items.max {|x,y| x[index] <=> y[index]}
|
65
|
+
return (item) ? item[index] : (-1.0/0)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Get the minimum value of an attribute in the data set
|
69
|
+
def self.min(data_set, attribute)
|
70
|
+
index = data_set.get_index(attribute)
|
71
|
+
item = data_set.data_items.min {|x,y| x[index] <=> y[index]}
|
72
|
+
return (item) ? item[index] : (1.0/0)
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require File.dirname(__FILE__) + '/../data/data_set'
|
3
|
+
|
4
|
+
|
5
|
+
module Ai4r
|
6
|
+
|
7
|
+
module Experiment
|
8
|
+
|
9
|
+
# The ClassifierEvaluator is useful to compare different classifiers
|
10
|
+
# algorithms. The evaluator builds the Classifiers using the same data
|
11
|
+
# examples, and provides methods to evalute their performance in parallel.
|
12
|
+
# It is a nice tool to compare and evaluate the performance of different
|
13
|
+
# algorithms, the same algorithm with different parameters, or your own new
|
14
|
+
# algorithm against the classic classifiers.
|
15
|
+
class ClassifierEvaluator
|
16
|
+
|
17
|
+
attr_reader :build_times, :eval_times, :classifiers
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
@classifiers = []
|
21
|
+
end
|
22
|
+
|
23
|
+
# Add a classifier instance to the test batch
|
24
|
+
def add_classifier(classifier)
|
25
|
+
@classifiers << classifier
|
26
|
+
return self
|
27
|
+
end
|
28
|
+
|
29
|
+
alias :<< :add_classifier
|
30
|
+
|
31
|
+
# Build all classifiers, using data examples found in data_set.
|
32
|
+
# The last attribute of each item is considered as the
|
33
|
+
# item class.
|
34
|
+
# Building times are measured by separate, and can be accessed
|
35
|
+
# through build_times attribute reader.
|
36
|
+
def build(data_set)
|
37
|
+
@build_times = []
|
38
|
+
@classifiers.each do |classifier|
|
39
|
+
@build_times << Benchmark.measure { classifier.build data_set }
|
40
|
+
end
|
41
|
+
return self
|
42
|
+
end
|
43
|
+
|
44
|
+
# You can evaluate new data, predicting its class.
|
45
|
+
# e.g.
|
46
|
+
# classifier.eval(['New York', '<30', 'F'])
|
47
|
+
# => ['Y', 'Y', 'Y', 'N', 'Y', 'Y', 'N']
|
48
|
+
# Evaluation times are measured by separate, and can be accessed
|
49
|
+
# through eval_times attribute reader.
|
50
|
+
def eval(data)
|
51
|
+
@eval_times = []
|
52
|
+
results = []
|
53
|
+
@classifiers.each do |classifier|
|
54
|
+
@eval_times << Benchmark.measure { results << classifier.eval(data) }
|
55
|
+
end
|
56
|
+
return results
|
57
|
+
end
|
58
|
+
|
59
|
+
# Test classifiers using a data set. The last attribute of each item
|
60
|
+
# is considered as the expected class. Data items are evaluated
|
61
|
+
# using all classifiers: evalution times, sucess rate, and quantity of
|
62
|
+
# classification errors are returned in a data set.
|
63
|
+
# The return data set has a row for every classifier tested, and the
|
64
|
+
# following attributes:
|
65
|
+
# ["Classifier", "Testing Time", "Errors", "Success rate"]
|
66
|
+
def test(data_set)
|
67
|
+
result_data_items = []
|
68
|
+
@classifiers.each do |classifier|
|
69
|
+
result_data_items << test_classifier(classifier, data_set)
|
70
|
+
end
|
71
|
+
return Ai4r::Data::DataSet.new(:data_items => result_data_items,
|
72
|
+
:data_labels => ["Classifier","Testing Time","Errors","Success rate"])
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
def test_classifier(classifier, data_set)
|
77
|
+
data_set_size = data_set.data_items.length
|
78
|
+
errors = 0
|
79
|
+
testing_times = Benchmark.measure do
|
80
|
+
data_set.data_items.each do |data_item|
|
81
|
+
data = data_item[0...-1]
|
82
|
+
expected_result = data_item.last
|
83
|
+
result = classifier.eval data
|
84
|
+
errors += 1 if result != expected_result
|
85
|
+
end
|
86
|
+
end
|
87
|
+
return [classifier, testing_times.real, errors,
|
88
|
+
((data_set_size-errors*1.0)/data_set_size)]
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
@@ -0,0 +1,270 @@
|
|
1
|
+
# Author:: Sergio Fierens
|
2
|
+
# License:: MPL 1.1
|
3
|
+
# Project:: ai4r
|
4
|
+
# Url:: http://ai4r.rubyforge.org/
|
5
|
+
#
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
+
module Ai4r
|
10
|
+
|
11
|
+
# The GeneticAlgorithm module implements the GeneticSearch and Chromosome
|
12
|
+
# classes. The GeneticSearch is a generic class, and can be used to solved
|
13
|
+
# any kind of problems. The GeneticSearch class performs a stochastic search
|
14
|
+
# of the solution of a given problem.
|
15
|
+
#
|
16
|
+
# The Chromosome is "problem specific". Ai4r built-in Chromosome class was
|
17
|
+
# designed to model the Travelling salesman problem. If you want to solve other
|
18
|
+
# type of problem, you will have to modify the Chromosome class, by overwriting
|
19
|
+
# its fitness, reproduce, and mutate functions, to model your specific problem.
|
20
|
+
module GeneticAlgorithm
|
21
|
+
|
22
|
+
# This class is used to automatically:
|
23
|
+
#
|
24
|
+
# 1. Choose initial population
|
25
|
+
# 2. Evaluate the fitness of each individual in the population
|
26
|
+
# 3. Repeat
|
27
|
+
# 1. Select best-ranking individuals to reproduce
|
28
|
+
# 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
|
29
|
+
# 3. Evaluate the individual fitnesses of the offspring
|
30
|
+
# 4. Replace worst ranked part of population with offspring
|
31
|
+
# 4. Until termination
|
32
|
+
#
|
33
|
+
# If you want to customize the algorithm, you must modify any of the following classes:
|
34
|
+
# - Chromosome
|
35
|
+
# - Population
|
36
|
+
class GeneticSearch
|
37
|
+
|
38
|
+
attr_accessor :population
|
39
|
+
|
40
|
+
|
41
|
+
def initialize(initial_population_size, generations)
|
42
|
+
@population_size = initial_population_size
|
43
|
+
@max_generation = generations
|
44
|
+
@generation = 0
|
45
|
+
end
|
46
|
+
|
47
|
+
# 1. Choose initial population
|
48
|
+
# 2. Evaluate the fitness of each individual in the population
|
49
|
+
# 3. Repeat
|
50
|
+
# 1. Select best-ranking individuals to reproduce
|
51
|
+
# 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
|
52
|
+
# 3. Evaluate the individual fitnesses of the offspring
|
53
|
+
# 4. Replace worst ranked part of population with offspring
|
54
|
+
# 4. Until termination
|
55
|
+
# 5. Return the best chromosome
|
56
|
+
def run
|
57
|
+
generate_initial_population #Generate initial population
|
58
|
+
@max_generation.times do
|
59
|
+
selected_to_breed = selection #Evaluates current population
|
60
|
+
offsprings = reproduction selected_to_breed #Generate the population for this new generation
|
61
|
+
replace_worst_ranked offsprings
|
62
|
+
end
|
63
|
+
return best_chromosome
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
def generate_initial_population
|
68
|
+
@population = []
|
69
|
+
@population_size.times do
|
70
|
+
population << Chromosome.seed
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Select best-ranking individuals to reproduce
|
75
|
+
#
|
76
|
+
# Selection is the stage of a genetic algorithm in which individual
|
77
|
+
# genomes are chosen from a population for later breeding.
|
78
|
+
# There are several generic selection algorithms, such as
|
79
|
+
# tournament selection and roulette wheel selection. We implemented the
|
80
|
+
# latest.
|
81
|
+
#
|
82
|
+
# Steps:
|
83
|
+
#
|
84
|
+
# 1. The fitness function is evaluated for each individual, providing fitness values
|
85
|
+
# 2. The population is sorted by descending fitness values.
|
86
|
+
# 3. The fitness values ar then normalized. (Highest fitness gets 1, lowest fitness gets 0). The normalized value is stored in the "normalized_fitness" attribute of the chromosomes.
|
87
|
+
# 4. A random number R is chosen. R is between 0 and the accumulated normalized value (all the normalized fitness values added togheter).
|
88
|
+
# 5. The selected individual is the first one whose accumulated normalized value (its is normalized value plus the normalized values of the chromosomes prior it) greater than R.
|
89
|
+
# 6. We repeat steps 4 and 5, 2/3 times the population size.
|
90
|
+
def selection
|
91
|
+
@population.sort! { |a, b| b.fitness <=> a.fitness}
|
92
|
+
best_fitness = @population[0].fitness
|
93
|
+
worst_fitness = @population.last.fitness
|
94
|
+
acum_fitness = 0
|
95
|
+
if best_fitness-worst_fitness > 0
|
96
|
+
@population.each do |chromosome|
|
97
|
+
chromosome.normalized_fitness = (chromosome.fitness - worst_fitness)/(best_fitness-worst_fitness)
|
98
|
+
acum_fitness += chromosome.normalized_fitness
|
99
|
+
end
|
100
|
+
else
|
101
|
+
@population.each { |chromosome| chromosome.normalized_fitness = 1}
|
102
|
+
end
|
103
|
+
selected_to_breed = []
|
104
|
+
((2*@population_size)/3).times do
|
105
|
+
selected_to_breed << select_random_individual(acum_fitness)
|
106
|
+
end
|
107
|
+
selected_to_breed
|
108
|
+
end
|
109
|
+
|
110
|
+
# We combine each pair of selected chromosome using the method
|
111
|
+
# Chromosome.reproduce
|
112
|
+
#
|
113
|
+
# The reproduction will also call the Chromosome.mutate method with
|
114
|
+
# each member of the population. You should implement Chromosome.mutate
|
115
|
+
# to only change (mutate) randomly. E.g. You could effectivly change the
|
116
|
+
# chromosome only if
|
117
|
+
# rand < ((1 - chromosome.normalized_fitness) * 0.4)
|
118
|
+
def reproduction(selected_to_breed)
|
119
|
+
offsprings = []
|
120
|
+
0.upto(selected_to_breed.length/2-1) do |i|
|
121
|
+
offsprings << Chromosome.reproduce(selected_to_breed[2*i], selected_to_breed[2*i+1])
|
122
|
+
end
|
123
|
+
@population.each do |individual|
|
124
|
+
Chromosome.mutate(individual)
|
125
|
+
end
|
126
|
+
return offsprings
|
127
|
+
end
|
128
|
+
|
129
|
+
# Replace worst ranked part of population with offspring
|
130
|
+
def replace_worst_ranked(offsprings)
|
131
|
+
size = offsprings.length
|
132
|
+
@population = @population [0..((-1*size)-1)] + offsprings
|
133
|
+
end
|
134
|
+
|
135
|
+
# Select the best chromosome in the population
|
136
|
+
def best_chromosome
|
137
|
+
the_best = @population[0]
|
138
|
+
@population.each do |chromosome|
|
139
|
+
the_best = chromosome if chromosome.fitness > the_best.fitness
|
140
|
+
end
|
141
|
+
return the_best
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
def select_random_individual(acum_fitness)
|
146
|
+
select_random_target = acum_fitness * rand
|
147
|
+
local_acum = 0
|
148
|
+
@population.each do |chromosome|
|
149
|
+
local_acum += chromosome.normalized_fitness
|
150
|
+
return chromosome if local_acum >= select_random_target
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
# A Chromosome is a representation of an individual solution for a specific
|
157
|
+
# problem. You will have to redifine the Chromosome representation for each
|
158
|
+
# particular problem, along with its fitness, mutate, reproduce, and seed
|
159
|
+
# methods.
|
160
|
+
class Chromosome
|
161
|
+
|
162
|
+
attr_accessor :data
|
163
|
+
attr_accessor :normalized_fitness
|
164
|
+
|
165
|
+
def initialize(data)
|
166
|
+
@data = data
|
167
|
+
end
|
168
|
+
|
169
|
+
# The fitness method quantifies the optimality of a solution
|
170
|
+
# (that is, a chromosome) in a genetic algorithm so that that particular
|
171
|
+
# chromosome may be ranked against all the other chromosomes.
|
172
|
+
#
|
173
|
+
# Optimal chromosomes, or at least chromosomes which are more optimal,
|
174
|
+
# are allowed to breed and mix their datasets by any of several techniques,
|
175
|
+
# producing a new generation that will (hopefully) be even better.
|
176
|
+
def fitness
|
177
|
+
return @fitness if @fitness
|
178
|
+
last_token = @data[0]
|
179
|
+
cost = 0
|
180
|
+
@data[1..-1].each do |token|
|
181
|
+
cost += @@costs[last_token][token]
|
182
|
+
last_token = token
|
183
|
+
end
|
184
|
+
@fitness = -1 * cost
|
185
|
+
return @fitness
|
186
|
+
end
|
187
|
+
|
188
|
+
# mutation method is used to maintain genetic diversity from one
|
189
|
+
# generation of a population of chromosomes to the next. It is analogous
|
190
|
+
# to biological mutation.
|
191
|
+
#
|
192
|
+
# The purpose of mutation in GAs is to allow the
|
193
|
+
# algorithm to avoid local minima by preventing the population of
|
194
|
+
# chromosomes from becoming too similar to each other, thus slowing or even
|
195
|
+
# stopping evolution.
|
196
|
+
#
|
197
|
+
# Calling the mutate function will "probably" slightly change a chromosome
|
198
|
+
# randomly.
|
199
|
+
#
|
200
|
+
# This implementation of "mutation" will (probably) reverse the
|
201
|
+
# order of 2 consecutive randome nodes
|
202
|
+
# (e.g. from [ 0, 1, 2, 4] to [0, 2, 1, 4]) if:
|
203
|
+
# ((1 - chromosome.normalized_fitness) * 0.4)
|
204
|
+
def self.mutate(chromosome)
|
205
|
+
if chromosome.normalized_fitness && rand < ((1 - chromosome.normalized_fitness) * 0.3)
|
206
|
+
data = chromosome.data
|
207
|
+
index = rand(data.length-1)
|
208
|
+
data[index], data[index+1] = data[index+1], data[index]
|
209
|
+
chromosome.data = data
|
210
|
+
@fitness = nil
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
# Reproduction method is used to combine two chromosomes (solutions) into
|
215
|
+
# a single new chromosome. There are several ways to
|
216
|
+
# combine two chromosomes: One-point crossover, Two-point crossover,
|
217
|
+
# "Cut and splice", edge recombination, and more.
|
218
|
+
#
|
219
|
+
# The method is usually dependant of the problem domain.
|
220
|
+
# In this case, we have implemented edge recombination, wich is the
|
221
|
+
# most used reproduction algorithm for the Travelling salesman problem.
|
222
|
+
def self.reproduce(a, b)
|
223
|
+
data_size = @@costs[0].length
|
224
|
+
available = []
|
225
|
+
0.upto(data_size-1) { |n| available << n }
|
226
|
+
token = a.data[0]
|
227
|
+
spawn = [token]
|
228
|
+
available.delete(token)
|
229
|
+
while available.length > 0 do
|
230
|
+
#Select next
|
231
|
+
if token != b.data.last && available.include?(b.data[b.data.index(token)+1])
|
232
|
+
next_token = b.data[b.data.index(token)+1]
|
233
|
+
elsif token != a.data.last && available.include?(a.data[a.data.index(token)+1])
|
234
|
+
next_token = a.data[a.data.index(token)+1]
|
235
|
+
else
|
236
|
+
next_token = available[rand(available.length)]
|
237
|
+
end
|
238
|
+
#Add to spawn
|
239
|
+
token = next_token
|
240
|
+
available.delete(token)
|
241
|
+
spawn << next_token
|
242
|
+
a, b = b, a if rand < 0.4
|
243
|
+
end
|
244
|
+
return Chromosome.new(spawn)
|
245
|
+
end
|
246
|
+
|
247
|
+
# Initializes an individual solution (chromosome) for the initial
|
248
|
+
# population. Usually the chromosome is generated randomly, but you can
|
249
|
+
# use some problem domain knowledge, to generate a
|
250
|
+
# (probably) better initial solution.
|
251
|
+
def self.seed
|
252
|
+
data_size = @@costs[0].length
|
253
|
+
available = []
|
254
|
+
0.upto(data_size-1) { |n| available << n }
|
255
|
+
seed = []
|
256
|
+
while available.length > 0 do
|
257
|
+
index = rand(available.length)
|
258
|
+
seed << available.delete_at(index)
|
259
|
+
end
|
260
|
+
return Chromosome.new(seed)
|
261
|
+
end
|
262
|
+
|
263
|
+
def self.set_cost_matrix(costs)
|
264
|
+
@@costs = costs
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
269
|
+
|
270
|
+
end
|