ai4ruby 1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. data/README.rdoc +47 -0
  2. data/examples/classifiers/id3_data.csv +121 -0
  3. data/examples/classifiers/id3_example.rb +29 -0
  4. data/examples/classifiers/naive_bayes_data.csv +11 -0
  5. data/examples/classifiers/naive_bayes_example.rb +16 -0
  6. data/examples/classifiers/results.txt +31 -0
  7. data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
  8. data/examples/genetic_algorithm/travel_cost.csv +16 -0
  9. data/examples/neural_network/backpropagation_example.rb +67 -0
  10. data/examples/neural_network/patterns_with_base_noise.rb +68 -0
  11. data/examples/neural_network/patterns_with_noise.rb +66 -0
  12. data/examples/neural_network/training_patterns.rb +68 -0
  13. data/examples/neural_network/xor_example.rb +35 -0
  14. data/examples/som/som_data.rb +156 -0
  15. data/examples/som/som_multi_node_example.rb +22 -0
  16. data/examples/som/som_single_example.rb +24 -0
  17. data/lib/ai4r.rb +33 -0
  18. data/lib/ai4r/classifiers/classifier.rb +62 -0
  19. data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
  20. data/lib/ai4r/classifiers/ib1.rb +121 -0
  21. data/lib/ai4r/classifiers/id3.rb +326 -0
  22. data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
  23. data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
  24. data/lib/ai4r/classifiers/one_r.rb +110 -0
  25. data/lib/ai4r/classifiers/prism.rb +197 -0
  26. data/lib/ai4r/classifiers/zero_r.rb +73 -0
  27. data/lib/ai4r/clusterers/average_linkage.rb +59 -0
  28. data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
  29. data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
  30. data/lib/ai4r/clusterers/clusterer.rb +61 -0
  31. data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
  32. data/lib/ai4r/clusterers/diana.rb +139 -0
  33. data/lib/ai4r/clusterers/k_means.rb +126 -0
  34. data/lib/ai4r/clusterers/median_linkage.rb +61 -0
  35. data/lib/ai4r/clusterers/single_linkage.rb +194 -0
  36. data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
  37. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +31 -0
  38. data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
  39. data/lib/ai4r/data/data_set.rb +266 -0
  40. data/lib/ai4r/data/parameterizable.rb +64 -0
  41. data/lib/ai4r/data/proximity.rb +100 -0
  42. data/lib/ai4r/data/statistics.rb +77 -0
  43. data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
  44. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
  45. data/lib/ai4r/neural_network/backpropagation.rb +326 -0
  46. data/lib/ai4r/neural_network/hopfield.rb +149 -0
  47. data/lib/ai4r/som/layer.rb +68 -0
  48. data/lib/ai4r/som/node.rb +96 -0
  49. data/lib/ai4r/som/som.rb +155 -0
  50. data/lib/ai4r/som/two_phase_layer.rb +90 -0
  51. data/test/classifiers/hyperpipes_test.rb +84 -0
  52. data/test/classifiers/ib1_test.rb +78 -0
  53. data/test/classifiers/id3_test.rb +208 -0
  54. data/test/classifiers/multilayer_perceptron_test.rb +79 -0
  55. data/test/classifiers/naive_bayes_test.rb +43 -0
  56. data/test/classifiers/one_r_test.rb +62 -0
  57. data/test/classifiers/prism_test.rb +85 -0
  58. data/test/classifiers/zero_r_test.rb +49 -0
  59. data/test/clusterers/average_linkage_test.rb +51 -0
  60. data/test/clusterers/bisecting_k_means_test.rb +66 -0
  61. data/test/clusterers/centroid_linkage_test.rb +53 -0
  62. data/test/clusterers/complete_linkage_test.rb +57 -0
  63. data/test/clusterers/diana_test.rb +69 -0
  64. data/test/clusterers/k_means_test.rb +100 -0
  65. data/test/clusterers/median_linkage_test.rb +53 -0
  66. data/test/clusterers/single_linkage_test.rb +122 -0
  67. data/test/clusterers/ward_linkage_hierarchical_test.rb +61 -0
  68. data/test/clusterers/ward_linkage_test.rb +53 -0
  69. data/test/clusterers/weighted_average_linkage_test.rb +53 -0
  70. data/test/data/data_set_test.rb +96 -0
  71. data/test/data/proximity_test.rb +81 -0
  72. data/test/data/statistics_test.rb +65 -0
  73. data/test/experiment/classifier_evaluator_test.rb +76 -0
  74. data/test/genetic_algorithm/chromosome_test.rb +58 -0
  75. data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
  76. data/test/neural_network/backpropagation_test.rb +82 -0
  77. data/test/neural_network/hopfield_test.rb +72 -0
  78. data/test/som/som_test.rb +97 -0
  79. metadata +168 -0
@@ -0,0 +1,64 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ module Ai4r
11
+ module Data
12
+ module Parameterizable
13
+
14
+ module ClassMethods
15
+
16
+ # Get info on what can be parameterized on this algorithm.
17
+ # It returns a hash with the following format:
18
+ # { :param_name => "Info on the parameter" }
19
+ def get_parameters_info
20
+ return @_params_info_ || {}
21
+ end
22
+
23
+ # Set info on what can be parameterized on this algorithm.
24
+ # You must provide a hash with the following format:
25
+ # { :param_name => "Info on the parameter" }
26
+ def parameters_info(params_info)
27
+ @_params_info_ = params_info
28
+ params_info.keys.each do |param|
29
+ attr_accessor param
30
+ end
31
+ end
32
+ end
33
+
34
+ # Set parameter values on this algorithm instance.
35
+ # You must provide a hash with the folowing format:
36
+ # { :param_name => parameter_value }
37
+ def set_parameters(params)
38
+ self.class.get_parameters_info.keys.each do | key |
39
+ if self.respond_to?("#{key}=".to_sym)
40
+ send("#{key}=".to_sym, params[key]) if params.has_key? key
41
+ end
42
+ end
43
+ return self
44
+ end
45
+
46
+ # Get parameter values on this algorithm instance.
47
+ # Returns a hash with the folowing format:
48
+ # { :param_name => parameter_value }
49
+ def get_parameters
50
+ params = {}
51
+ self.class.get_parameters_info.keys.each do | key |
52
+ params[key] = send(key) if self.respond_to?(key)
53
+ end
54
+ return params
55
+ end
56
+
57
+ def self.included(base)
58
+ base.extend(ClassMethods)
59
+ end
60
+
61
+ end
62
+ end
63
+ end
64
+
@@ -0,0 +1,100 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ module Ai4r
11
+ module Data
12
+
13
+ # This module provides classical distance functions
14
+ module Proximity
15
+
16
+ # This is a faster computational replacement for eclidean distance.
17
+ # Parameters a and b are vectors with continuous attributes.
18
+ def self.squared_euclidean_distance(a, b)
19
+ sum = 0.0
20
+ a.each_with_index do |item_a, i|
21
+ item_b = b[i]
22
+ sum += (item_a - item_b)**2
23
+ end
24
+ return sum
25
+ end
26
+
27
+ # Euclidean distance, or L2 norm.
28
+ # Parameters a and b are vectors with continuous attributes.
29
+ # Euclidean distance tends to form hyperspherical
30
+ # clusters(Clustering, Xu and Wunsch, 2009).
31
+ # Translations and rotations do not cause a
32
+ # distortion in distance relation (Duda et al, 2001)
33
+ # If attributes are measured with different units,
34
+ # attributes with larger values and variance will
35
+ # dominate the metric.
36
+ def self.euclidean_distance(a, b)
37
+ Math.sqrt(squared_euclidean_distance(a, b))
38
+ end
39
+
40
+
41
+ # city block, Manhattan distance, or L1 norm.
42
+ # Parameters a and b are vectors with continuous attributes.
43
+ def self.manhattan_distance(a, b)
44
+ sum = 0.0
45
+ a.each_with_index do |item_a, i|
46
+ item_b = b[i]
47
+ sum += (item_a - item_b).abs
48
+ end
49
+ return sum
50
+ end
51
+
52
+ # Sup distance, or L-intinity norm
53
+ # Parameters a and b are vectors with continuous attributes.
54
+ def self.sup_distance(a, b)
55
+ distance = 0.0
56
+ a.each_with_index do |item_a, i|
57
+ item_b = b[i]
58
+ diff = (item_a - item_b).abs
59
+ distance = diff if diff > distance
60
+ end
61
+ return distance
62
+ end
63
+
64
+ # The Hamming distance between two attributes vectors of equal
65
+ # length is the number of attributes for which the corresponding
66
+ # vectors are different
67
+ # This distance function is frequently used with binary attributes,
68
+ # though it can be used with other discrete attributes.
69
+ def self.hamming_distance(a,b)
70
+ count = 0
71
+ a.each_index do |i|
72
+ count += 1 if a[i] != b[i]
73
+ end
74
+ return count
75
+ end
76
+
77
+ # The "Simple matching" distance between two attribute sets is given
78
+ # by the number of values present on both vectors.
79
+ # If sets a and b have lengths da and db then:
80
+ #
81
+ # S = 2/(da + db) * Number of values present on both sets
82
+ # D = 1.0/S - 1
83
+ #
84
+ # Some considerations:
85
+ # * a and b must not include repeated items
86
+ # * all attributes are treated equally
87
+ # * all attributes are treated equally
88
+ def self.simple_matching_distance(a,b)
89
+ similarity = 0.0
90
+ a.each {|item| similarity += 2 if b.include?(item)}
91
+ similarity /= (a.length + b.length)
92
+ return 1.0/similarity - 1
93
+ end
94
+
95
+ end
96
+
97
+ end
98
+
99
+ end
100
+
@@ -0,0 +1,77 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/data_set'
11
+
12
+ module Ai4r
13
+ module Data
14
+
15
+ # This module provides some basic statistics functions to operate on
16
+ # data set attributes.
17
+ module Statistics
18
+
19
+ # Get the sample mean
20
+ def self.mean(data_set, attribute)
21
+ index = data_set.get_index(attribute)
22
+ sum = 0.0
23
+ data_set.data_items.each { |item| sum += item[index] }
24
+ return sum / data_set.data_items.length
25
+ end
26
+
27
+ # Get the variance.
28
+ # You can provide the mean if you have it already, to speed up things.
29
+ def self.variance(data_set, attribute, mean = nil)
30
+ index = data_set.get_index(attribute)
31
+ mean = mean(data_set, attribute)
32
+ sum = 0.0
33
+ data_set.data_items.each { |item| sum += (item[index]-mean)**2 }
34
+ return sum / (data_set.data_items.length-1)
35
+ end
36
+
37
+ # Get the standard deviation.
38
+ # You can provide the variance if you have it already, to speed up things.
39
+ def self.standard_deviation(data_set, attribute, variance = nil)
40
+ variance ||= variance(data_set, attribute)
41
+ Math.sqrt(variance)
42
+ end
43
+
44
+ # Get the sample mode.
45
+ def self.mode(data_set, attribute)
46
+ index = data_set.get_index(attribute)
47
+ count = Hash.new {0}
48
+ max_count = 0
49
+ mode = nil
50
+ data_set.data_items.each do |data_item|
51
+ attr_value = data_item[index]
52
+ attr_count = (count[attr_value] += 1)
53
+ if attr_count > max_count
54
+ mode = attr_value
55
+ max_count = attr_count
56
+ end
57
+ end
58
+ return mode
59
+ end
60
+
61
+ # Get the maximum value of an attribute in the data set
62
+ def self.max(data_set, attribute)
63
+ index = data_set.get_index(attribute)
64
+ item = data_set.data_items.max {|x,y| x[index] <=> y[index]}
65
+ return (item) ? item[index] : (-1.0/0)
66
+ end
67
+
68
+ # Get the minimum value of an attribute in the data set
69
+ def self.min(data_set, attribute)
70
+ index = data_set.get_index(attribute)
71
+ item = data_set.data_items.min {|x,y| x[index] <=> y[index]}
72
+ return (item) ? item[index] : (1.0/0)
73
+ end
74
+
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,95 @@
1
+ require 'benchmark'
2
+ require File.dirname(__FILE__) + '/../data/data_set'
3
+
4
+
5
+ module Ai4r
6
+
7
+ module Experiment
8
+
9
+ # The ClassifierEvaluator is useful to compare different classifiers
10
+ # algorithms. The evaluator builds the Classifiers using the same data
11
+ # examples, and provides methods to evalute their performance in parallel.
12
+ # It is a nice tool to compare and evaluate the performance of different
13
+ # algorithms, the same algorithm with different parameters, or your own new
14
+ # algorithm against the classic classifiers.
15
+ class ClassifierEvaluator
16
+
17
+ attr_reader :build_times, :eval_times, :classifiers
18
+
19
+ def initialize
20
+ @classifiers = []
21
+ end
22
+
23
+ # Add a classifier instance to the test batch
24
+ def add_classifier(classifier)
25
+ @classifiers << classifier
26
+ return self
27
+ end
28
+
29
+ alias :<< :add_classifier
30
+
31
+ # Build all classifiers, using data examples found in data_set.
32
+ # The last attribute of each item is considered as the
33
+ # item class.
34
+ # Building times are measured by separate, and can be accessed
35
+ # through build_times attribute reader.
36
+ def build(data_set)
37
+ @build_times = []
38
+ @classifiers.each do |classifier|
39
+ @build_times << Benchmark.measure { classifier.build data_set }
40
+ end
41
+ return self
42
+ end
43
+
44
+ # You can evaluate new data, predicting its class.
45
+ # e.g.
46
+ # classifier.eval(['New York', '<30', 'F'])
47
+ # => ['Y', 'Y', 'Y', 'N', 'Y', 'Y', 'N']
48
+ # Evaluation times are measured by separate, and can be accessed
49
+ # through eval_times attribute reader.
50
+ def eval(data)
51
+ @eval_times = []
52
+ results = []
53
+ @classifiers.each do |classifier|
54
+ @eval_times << Benchmark.measure { results << classifier.eval(data) }
55
+ end
56
+ return results
57
+ end
58
+
59
+ # Test classifiers using a data set. The last attribute of each item
60
+ # is considered as the expected class. Data items are evaluated
61
+ # using all classifiers: evalution times, sucess rate, and quantity of
62
+ # classification errors are returned in a data set.
63
+ # The return data set has a row for every classifier tested, and the
64
+ # following attributes:
65
+ # ["Classifier", "Testing Time", "Errors", "Success rate"]
66
+ def test(data_set)
67
+ result_data_items = []
68
+ @classifiers.each do |classifier|
69
+ result_data_items << test_classifier(classifier, data_set)
70
+ end
71
+ return Ai4r::Data::DataSet.new(:data_items => result_data_items,
72
+ :data_labels => ["Classifier","Testing Time","Errors","Success rate"])
73
+ end
74
+
75
+ private
76
+ def test_classifier(classifier, data_set)
77
+ data_set_size = data_set.data_items.length
78
+ errors = 0
79
+ testing_times = Benchmark.measure do
80
+ data_set.data_items.each do |data_item|
81
+ data = data_item[0...-1]
82
+ expected_result = data_item.last
83
+ result = classifier.eval data
84
+ errors += 1 if result != expected_result
85
+ end
86
+ end
87
+ return [classifier, testing_times.real, errors,
88
+ ((data_set_size-errors*1.0)/data_set_size)]
89
+ end
90
+
91
+ end
92
+
93
+ end
94
+
95
+ end
@@ -0,0 +1,270 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+ module Ai4r
10
+
11
+ # The GeneticAlgorithm module implements the GeneticSearch and Chromosome
12
+ # classes. The GeneticSearch is a generic class, and can be used to solved
13
+ # any kind of problems. The GeneticSearch class performs a stochastic search
14
+ # of the solution of a given problem.
15
+ #
16
+ # The Chromosome is "problem specific". Ai4r built-in Chromosome class was
17
+ # designed to model the Travelling salesman problem. If you want to solve other
18
+ # type of problem, you will have to modify the Chromosome class, by overwriting
19
+ # its fitness, reproduce, and mutate functions, to model your specific problem.
20
+ module GeneticAlgorithm
21
+
22
+ # This class is used to automatically:
23
+ #
24
+ # 1. Choose initial population
25
+ # 2. Evaluate the fitness of each individual in the population
26
+ # 3. Repeat
27
+ # 1. Select best-ranking individuals to reproduce
28
+ # 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
29
+ # 3. Evaluate the individual fitnesses of the offspring
30
+ # 4. Replace worst ranked part of population with offspring
31
+ # 4. Until termination
32
+ #
33
+ # If you want to customize the algorithm, you must modify any of the following classes:
34
+ # - Chromosome
35
+ # - Population
36
+ class GeneticSearch
37
+
38
+ attr_accessor :population
39
+
40
+
41
+ def initialize(initial_population_size, generations)
42
+ @population_size = initial_population_size
43
+ @max_generation = generations
44
+ @generation = 0
45
+ end
46
+
47
+ # 1. Choose initial population
48
+ # 2. Evaluate the fitness of each individual in the population
49
+ # 3. Repeat
50
+ # 1. Select best-ranking individuals to reproduce
51
+ # 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
52
+ # 3. Evaluate the individual fitnesses of the offspring
53
+ # 4. Replace worst ranked part of population with offspring
54
+ # 4. Until termination
55
+ # 5. Return the best chromosome
56
+ def run
57
+ generate_initial_population #Generate initial population
58
+ @max_generation.times do
59
+ selected_to_breed = selection #Evaluates current population
60
+ offsprings = reproduction selected_to_breed #Generate the population for this new generation
61
+ replace_worst_ranked offsprings
62
+ end
63
+ return best_chromosome
64
+ end
65
+
66
+
67
+ def generate_initial_population
68
+ @population = []
69
+ @population_size.times do
70
+ population << Chromosome.seed
71
+ end
72
+ end
73
+
74
+ # Select best-ranking individuals to reproduce
75
+ #
76
+ # Selection is the stage of a genetic algorithm in which individual
77
+ # genomes are chosen from a population for later breeding.
78
+ # There are several generic selection algorithms, such as
79
+ # tournament selection and roulette wheel selection. We implemented the
80
+ # latest.
81
+ #
82
+ # Steps:
83
+ #
84
+ # 1. The fitness function is evaluated for each individual, providing fitness values
85
+ # 2. The population is sorted by descending fitness values.
86
+ # 3. The fitness values ar then normalized. (Highest fitness gets 1, lowest fitness gets 0). The normalized value is stored in the "normalized_fitness" attribute of the chromosomes.
87
+ # 4. A random number R is chosen. R is between 0 and the accumulated normalized value (all the normalized fitness values added togheter).
88
+ # 5. The selected individual is the first one whose accumulated normalized value (its is normalized value plus the normalized values of the chromosomes prior it) greater than R.
89
+ # 6. We repeat steps 4 and 5, 2/3 times the population size.
90
+ def selection
91
+ @population.sort! { |a, b| b.fitness <=> a.fitness}
92
+ best_fitness = @population[0].fitness
93
+ worst_fitness = @population.last.fitness
94
+ acum_fitness = 0
95
+ if best_fitness-worst_fitness > 0
96
+ @population.each do |chromosome|
97
+ chromosome.normalized_fitness = (chromosome.fitness - worst_fitness)/(best_fitness-worst_fitness)
98
+ acum_fitness += chromosome.normalized_fitness
99
+ end
100
+ else
101
+ @population.each { |chromosome| chromosome.normalized_fitness = 1}
102
+ end
103
+ selected_to_breed = []
104
+ ((2*@population_size)/3).times do
105
+ selected_to_breed << select_random_individual(acum_fitness)
106
+ end
107
+ selected_to_breed
108
+ end
109
+
110
+ # We combine each pair of selected chromosome using the method
111
+ # Chromosome.reproduce
112
+ #
113
+ # The reproduction will also call the Chromosome.mutate method with
114
+ # each member of the population. You should implement Chromosome.mutate
115
+ # to only change (mutate) randomly. E.g. You could effectivly change the
116
+ # chromosome only if
117
+ # rand < ((1 - chromosome.normalized_fitness) * 0.4)
118
+ def reproduction(selected_to_breed)
119
+ offsprings = []
120
+ 0.upto(selected_to_breed.length/2-1) do |i|
121
+ offsprings << Chromosome.reproduce(selected_to_breed[2*i], selected_to_breed[2*i+1])
122
+ end
123
+ @population.each do |individual|
124
+ Chromosome.mutate(individual)
125
+ end
126
+ return offsprings
127
+ end
128
+
129
+ # Replace worst ranked part of population with offspring
130
+ def replace_worst_ranked(offsprings)
131
+ size = offsprings.length
132
+ @population = @population [0..((-1*size)-1)] + offsprings
133
+ end
134
+
135
+ # Select the best chromosome in the population
136
+ def best_chromosome
137
+ the_best = @population[0]
138
+ @population.each do |chromosome|
139
+ the_best = chromosome if chromosome.fitness > the_best.fitness
140
+ end
141
+ return the_best
142
+ end
143
+
144
+ private
145
+ def select_random_individual(acum_fitness)
146
+ select_random_target = acum_fitness * rand
147
+ local_acum = 0
148
+ @population.each do |chromosome|
149
+ local_acum += chromosome.normalized_fitness
150
+ return chromosome if local_acum >= select_random_target
151
+ end
152
+ end
153
+
154
+ end
155
+
156
+ # A Chromosome is a representation of an individual solution for a specific
157
+ # problem. You will have to redifine the Chromosome representation for each
158
+ # particular problem, along with its fitness, mutate, reproduce, and seed
159
+ # methods.
160
+ class Chromosome
161
+
162
+ attr_accessor :data
163
+ attr_accessor :normalized_fitness
164
+
165
+ def initialize(data)
166
+ @data = data
167
+ end
168
+
169
+ # The fitness method quantifies the optimality of a solution
170
+ # (that is, a chromosome) in a genetic algorithm so that that particular
171
+ # chromosome may be ranked against all the other chromosomes.
172
+ #
173
+ # Optimal chromosomes, or at least chromosomes which are more optimal,
174
+ # are allowed to breed and mix their datasets by any of several techniques,
175
+ # producing a new generation that will (hopefully) be even better.
176
+ def fitness
177
+ return @fitness if @fitness
178
+ last_token = @data[0]
179
+ cost = 0
180
+ @data[1..-1].each do |token|
181
+ cost += @@costs[last_token][token]
182
+ last_token = token
183
+ end
184
+ @fitness = -1 * cost
185
+ return @fitness
186
+ end
187
+
188
+ # mutation method is used to maintain genetic diversity from one
189
+ # generation of a population of chromosomes to the next. It is analogous
190
+ # to biological mutation.
191
+ #
192
+ # The purpose of mutation in GAs is to allow the
193
+ # algorithm to avoid local minima by preventing the population of
194
+ # chromosomes from becoming too similar to each other, thus slowing or even
195
+ # stopping evolution.
196
+ #
197
+ # Calling the mutate function will "probably" slightly change a chromosome
198
+ # randomly.
199
+ #
200
+ # This implementation of "mutation" will (probably) reverse the
201
+ # order of 2 consecutive randome nodes
202
+ # (e.g. from [ 0, 1, 2, 4] to [0, 2, 1, 4]) if:
203
+ # ((1 - chromosome.normalized_fitness) * 0.4)
204
+ def self.mutate(chromosome)
205
+ if chromosome.normalized_fitness && rand < ((1 - chromosome.normalized_fitness) * 0.3)
206
+ data = chromosome.data
207
+ index = rand(data.length-1)
208
+ data[index], data[index+1] = data[index+1], data[index]
209
+ chromosome.data = data
210
+ @fitness = nil
211
+ end
212
+ end
213
+
214
+ # Reproduction method is used to combine two chromosomes (solutions) into
215
+ # a single new chromosome. There are several ways to
216
+ # combine two chromosomes: One-point crossover, Two-point crossover,
217
+ # "Cut and splice", edge recombination, and more.
218
+ #
219
+ # The method is usually dependant of the problem domain.
220
+ # In this case, we have implemented edge recombination, wich is the
221
+ # most used reproduction algorithm for the Travelling salesman problem.
222
+ def self.reproduce(a, b)
223
+ data_size = @@costs[0].length
224
+ available = []
225
+ 0.upto(data_size-1) { |n| available << n }
226
+ token = a.data[0]
227
+ spawn = [token]
228
+ available.delete(token)
229
+ while available.length > 0 do
230
+ #Select next
231
+ if token != b.data.last && available.include?(b.data[b.data.index(token)+1])
232
+ next_token = b.data[b.data.index(token)+1]
233
+ elsif token != a.data.last && available.include?(a.data[a.data.index(token)+1])
234
+ next_token = a.data[a.data.index(token)+1]
235
+ else
236
+ next_token = available[rand(available.length)]
237
+ end
238
+ #Add to spawn
239
+ token = next_token
240
+ available.delete(token)
241
+ spawn << next_token
242
+ a, b = b, a if rand < 0.4
243
+ end
244
+ return Chromosome.new(spawn)
245
+ end
246
+
247
+ # Initializes an individual solution (chromosome) for the initial
248
+ # population. Usually the chromosome is generated randomly, but you can
249
+ # use some problem domain knowledge, to generate a
250
+ # (probably) better initial solution.
251
+ def self.seed
252
+ data_size = @@costs[0].length
253
+ available = []
254
+ 0.upto(data_size-1) { |n| available << n }
255
+ seed = []
256
+ while available.length > 0 do
257
+ index = rand(available.length)
258
+ seed << available.delete_at(index)
259
+ end
260
+ return Chromosome.new(seed)
261
+ end
262
+
263
+ def self.set_cost_matrix(costs)
264
+ @@costs = costs
265
+ end
266
+ end
267
+
268
+ end
269
+
270
+ end