nirvdrum-ai4r 1.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (150) hide show
  1. data/.gitignore +1 -0
  2. data/.rakeTasks +7 -0
  3. data/README.rdoc +56 -0
  4. data/Rakefile.rb +42 -0
  5. data/VERSION +1 -0
  6. data/ai4r.gemspec +221 -0
  7. data/change_log +49 -0
  8. data/examples/classifiers/id3_data.csv +121 -0
  9. data/examples/classifiers/id3_example.rb +29 -0
  10. data/examples/classifiers/naive_bayes_data.csv +11 -0
  11. data/examples/classifiers/naive_bayes_example.rb +16 -0
  12. data/examples/classifiers/results.txt +31 -0
  13. data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
  14. data/examples/genetic_algorithm/travel_cost.csv +16 -0
  15. data/examples/neural_network/backpropagation_example.rb +67 -0
  16. data/examples/neural_network/patterns_with_base_noise.rb +68 -0
  17. data/examples/neural_network/patterns_with_noise.rb +66 -0
  18. data/examples/neural_network/training_patterns.rb +68 -0
  19. data/examples/neural_network/xor_example.rb +35 -0
  20. data/examples/som/som_data.rb +156 -0
  21. data/examples/som/som_multi_node_example.rb +22 -0
  22. data/examples/som/som_single_example.rb +24 -0
  23. data/lib/ai4r.rb +32 -0
  24. data/lib/ai4r/classifiers/classifier.rb +59 -0
  25. data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
  26. data/lib/ai4r/classifiers/id3.rb +326 -0
  27. data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
  28. data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
  29. data/lib/ai4r/classifiers/one_r.rb +110 -0
  30. data/lib/ai4r/classifiers/prism.rb +197 -0
  31. data/lib/ai4r/classifiers/zero_r.rb +73 -0
  32. data/lib/ai4r/clusterers/average_linkage.rb +59 -0
  33. data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
  34. data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
  35. data/lib/ai4r/clusterers/clusterer.rb +61 -0
  36. data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
  37. data/lib/ai4r/clusterers/diana.rb +139 -0
  38. data/lib/ai4r/clusterers/k_means.rb +126 -0
  39. data/lib/ai4r/clusterers/median_linkage.rb +61 -0
  40. data/lib/ai4r/clusterers/single_linkage.rb +194 -0
  41. data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
  42. data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
  43. data/lib/ai4r/data/data_set.rb +266 -0
  44. data/lib/ai4r/data/parameterizable.rb +64 -0
  45. data/lib/ai4r/data/proximity.rb +100 -0
  46. data/lib/ai4r/data/statistics.rb +77 -0
  47. data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
  48. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
  49. data/lib/ai4r/neural_network/backpropagation.rb +293 -0
  50. data/lib/ai4r/neural_network/hopfield.rb +149 -0
  51. data/lib/ai4r/som/layer.rb +68 -0
  52. data/lib/ai4r/som/node.rb +96 -0
  53. data/lib/ai4r/som/som.rb +155 -0
  54. data/lib/ai4r/som/two_phase_layer.rb +90 -0
  55. data/site/forrest.properties +152 -0
  56. data/site/forrest.properties.dispatcher.properties +25 -0
  57. data/site/forrest.properties.xml +29 -0
  58. data/site/src/documentation/README.txt +7 -0
  59. data/site/src/documentation/classes/CatalogManager.properties +62 -0
  60. data/site/src/documentation/content/locationmap.xml +72 -0
  61. data/site/src/documentation/content/xdocs/downloads.html +9 -0
  62. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +294 -0
  63. data/site/src/documentation/content/xdocs/index.xml +155 -0
  64. data/site/src/documentation/content/xdocs/machineLearning.xml +131 -0
  65. data/site/src/documentation/content/xdocs/neuralNetworks.xml +270 -0
  66. data/site/src/documentation/content/xdocs/site.xml +54 -0
  67. data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
  68. data/site/src/documentation/content/xdocs/tabs.xml +35 -0
  69. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  70. data/site/src/documentation/resources/images/c.png +0 -0
  71. data/site/src/documentation/resources/images/c_wbn.png +0 -0
  72. data/site/src/documentation/resources/images/c_wn.png +0 -0
  73. data/site/src/documentation/resources/images/ellipse-2.svg +30 -0
  74. data/site/src/documentation/resources/images/ero.gif +0 -0
  75. data/site/src/documentation/resources/images/europe2.png +0 -0
  76. data/site/src/documentation/resources/images/europe3.png +0 -0
  77. data/site/src/documentation/resources/images/fitness.png +0 -0
  78. data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
  79. data/site/src/documentation/resources/images/icon-a.png +0 -0
  80. data/site/src/documentation/resources/images/icon-b.png +0 -0
  81. data/site/src/documentation/resources/images/icon.png +0 -0
  82. data/site/src/documentation/resources/images/jadeferret.png +0 -0
  83. data/site/src/documentation/resources/images/my_email.png +0 -0
  84. data/site/src/documentation/resources/images/neural_network_example.png +0 -0
  85. data/site/src/documentation/resources/images/project-logo.png +0 -0
  86. data/site/src/documentation/resources/images/rubyforge.png +0 -0
  87. data/site/src/documentation/resources/images/s.png +0 -0
  88. data/site/src/documentation/resources/images/s_wbn.png +0 -0
  89. data/site/src/documentation/resources/images/s_wn.png +0 -0
  90. data/site/src/documentation/resources/images/sigmoid.png +0 -0
  91. data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
  92. data/site/src/documentation/resources/images/t.png +0 -0
  93. data/site/src/documentation/resources/images/t_wbn.png +0 -0
  94. data/site/src/documentation/resources/images/t_wn.png +0 -0
  95. data/site/src/documentation/resources/schema/catalog.xcat +29 -0
  96. data/site/src/documentation/resources/schema/hello-v10.dtd +51 -0
  97. data/site/src/documentation/resources/schema/symbols-project-v10.ent +26 -0
  98. data/site/src/documentation/resources/stylesheets/hello2document.xsl +33 -0
  99. data/site/src/documentation/sitemap.xmap +66 -0
  100. data/site/src/documentation/skinconf.xml +418 -0
  101. data/site/src/documentation/translations/langcode.xml +29 -0
  102. data/site/src/documentation/translations/languages_de.xml +24 -0
  103. data/site/src/documentation/translations/languages_en.xml +24 -0
  104. data/site/src/documentation/translations/languages_es.xml +22 -0
  105. data/site/src/documentation/translations/languages_fr.xml +24 -0
  106. data/site/src/documentation/translations/languages_nl.xml +24 -0
  107. data/site/src/documentation/translations/menu.xml +33 -0
  108. data/site/src/documentation/translations/menu_af.xml +33 -0
  109. data/site/src/documentation/translations/menu_de.xml +33 -0
  110. data/site/src/documentation/translations/menu_es.xml +33 -0
  111. data/site/src/documentation/translations/menu_fr.xml +33 -0
  112. data/site/src/documentation/translations/menu_it.xml +33 -0
  113. data/site/src/documentation/translations/menu_nl.xml +33 -0
  114. data/site/src/documentation/translations/menu_no.xml +33 -0
  115. data/site/src/documentation/translations/menu_ru.xml +33 -0
  116. data/site/src/documentation/translations/menu_sk.xml +33 -0
  117. data/site/src/documentation/translations/tabs.xml +22 -0
  118. data/site/src/documentation/translations/tabs_de.xml +22 -0
  119. data/site/src/documentation/translations/tabs_es.xml +22 -0
  120. data/site/src/documentation/translations/tabs_fr.xml +22 -0
  121. data/site/src/documentation/translations/tabs_nl.xml +22 -0
  122. data/test/classifiers/hyperpipes_test.rb +84 -0
  123. data/test/classifiers/id3_test.rb +208 -0
  124. data/test/classifiers/multilayer_perceptron_test.rb +79 -0
  125. data/test/classifiers/naive_bayes_test.rb +43 -0
  126. data/test/classifiers/one_r_test.rb +62 -0
  127. data/test/classifiers/prism_test.rb +85 -0
  128. data/test/classifiers/zero_r_test.rb +50 -0
  129. data/test/clusterers/average_linkage_test.rb +51 -0
  130. data/test/clusterers/bisecting_k_means_test.rb +66 -0
  131. data/test/clusterers/centroid_linkage_test.rb +53 -0
  132. data/test/clusterers/complete_linkage_test.rb +57 -0
  133. data/test/clusterers/diana_test.rb +69 -0
  134. data/test/clusterers/k_means_test.rb +100 -0
  135. data/test/clusterers/median_linkage_test.rb +53 -0
  136. data/test/clusterers/single_linkage_test.rb +122 -0
  137. data/test/clusterers/ward_linkage_test.rb +53 -0
  138. data/test/clusterers/weighted_average_linkage_test.rb +53 -0
  139. data/test/data/data_set.csv +121 -0
  140. data/test/data/data_set_test.rb +96 -0
  141. data/test/data/proximity_test.rb +81 -0
  142. data/test/data/statistics_data_set.csv +5 -0
  143. data/test/data/statistics_test.rb +65 -0
  144. data/test/experiment/classifier_evaluator_test.rb +76 -0
  145. data/test/genetic_algorithm/chromosome_test.rb +58 -0
  146. data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
  147. data/test/neural_network/backpropagation_test.rb +69 -0
  148. data/test/neural_network/hopfield_test.rb +72 -0
  149. data/test/som/som_test.rb +97 -0
  150. metadata +238 -0
@@ -0,0 +1,64 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ module Ai4r
11
+ module Data
12
+ module Parameterizable
13
+
14
+ module ClassMethods
15
+
16
+ # Get info on what can be parameterized on this algorithm.
17
+ # It returns a hash with the following format:
18
+ # { :param_name => "Info on the parameter" }
19
+ def get_parameters_info
20
+ return @_params_info_ || {}
21
+ end
22
+
23
+ # Set info on what can be parameterized on this algorithm.
24
+ # You must provide a hash with the following format:
25
+ # { :param_name => "Info on the parameter" }
26
+ def parameters_info(params_info)
27
+ @_params_info_ = params_info
28
+ params_info.keys.each do |param|
29
+ attr_accessor param
30
+ end
31
+ end
32
+ end
33
+
34
+ # Set parameter values on this algorithm instance.
35
+ # You must provide a hash with the folowing format:
36
+ # { :param_name => parameter_value }
37
+ def set_parameters(params)
38
+ self.class.get_parameters_info.keys.each do | key |
39
+ if self.respond_to?("#{key}=".to_sym)
40
+ send("#{key}=".to_sym, params[key]) if params.has_key? key
41
+ end
42
+ end
43
+ return self
44
+ end
45
+
46
+ # Get parameter values on this algorithm instance.
47
+ # Returns a hash with the folowing format:
48
+ # { :param_name => parameter_value }
49
+ def get_parameters
50
+ params = {}
51
+ self.class.get_parameters_info.keys.each do | key |
52
+ params[key] = send(key) if self.respond_to?(key)
53
+ end
54
+ return params
55
+ end
56
+
57
+ def self.included(base)
58
+ base.extend(ClassMethods)
59
+ end
60
+
61
+ end
62
+ end
63
+ end
64
+
@@ -0,0 +1,100 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ module Ai4r
11
+ module Data
12
+
13
+ # This module provides classical distance functions
14
+ module Proximity
15
+
16
+ # This is a faster computational replacement for eclidean distance.
17
+ # Parameters a and b are vectors with continuous attributes.
18
+ def self.squared_euclidean_distance(a, b)
19
+ sum = 0.0
20
+ a.each_with_index do |item_a, i|
21
+ item_b = b[i]
22
+ sum += (item_a - item_b)**2
23
+ end
24
+ return sum
25
+ end
26
+
27
+ # Euclidean distance, or L2 norm.
28
+ # Parameters a and b are vectors with continuous attributes.
29
+ # Euclidean distance tends to form hyperspherical
30
+ # clusters(Clustering, Xu and Wunsch, 2009).
31
+ # Translations and rotations do not cause a
32
+ # distortion in distance relation (Duda et al, 2001)
33
+ # If attributes are measured with different units,
34
+ # attributes with larger values and variance will
35
+ # dominate the metric.
36
+ def self.euclidean_distance(a, b)
37
+ Math.sqrt(squared_euclidean_distance(a, b))
38
+ end
39
+
40
+
41
+ # city block, Manhattan distance, or L1 norm.
42
+ # Parameters a and b are vectors with continuous attributes.
43
+ def self.manhattan_distance(a, b)
44
+ sum = 0.0
45
+ a.each_with_index do |item_a, i|
46
+ item_b = b[i]
47
+ sum += (item_a - item_b).abs
48
+ end
49
+ return sum
50
+ end
51
+
52
+ # Sup distance, or L-intinity norm
53
+ # Parameters a and b are vectors with continuous attributes.
54
+ def self.sup_distance(a, b)
55
+ distance = 0.0
56
+ a.each_with_index do |item_a, i|
57
+ item_b = b[i]
58
+ diff = (item_a - item_b).abs
59
+ distance = diff if diff > distance
60
+ end
61
+ return distance
62
+ end
63
+
64
+ # The Hamming distance between two attributes vectors of equal
65
+ # length is the number of attributes for which the corresponding
66
+ # vectors are different
67
+ # This distance function is frequently used with binary attributes,
68
+ # though it can be used with other discrete attributes.
69
+ def self.hamming_distance(a,b)
70
+ count = 0
71
+ a.each_index do |i|
72
+ count += 1 if a[i] != b[i]
73
+ end
74
+ return count
75
+ end
76
+
77
+ # The "Simple matching" distance between two attribute sets is given
78
+ # by the number of values present on both vectors.
79
+ # If sets a and b have lengths da and db then:
80
+ #
81
+ # S = 2/(da + db) * Number of values present on both sets
82
+ # D = 1.0/S - 1
83
+ #
84
+ # Some considerations:
85
+ # * a and b must not include repeated items
86
+ # * all attributes are treated equally
87
+ # * all attributes are treated equally
88
+ def self.simple_matching_distance(a,b)
89
+ similarity = 0.0
90
+ a.each {|item| similarity += 2 if b.include?(item)}
91
+ similarity /= (a.length + b.length)
92
+ return 1.0/similarity - 1
93
+ end
94
+
95
+ end
96
+
97
+ end
98
+
99
+ end
100
+
@@ -0,0 +1,77 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/data_set'
11
+
12
+ module Ai4r
13
+ module Data
14
+
15
+ # This module provides some basic statistics functions to operate on
16
+ # data set attributes.
17
+ module Statistics
18
+
19
+ # Get the sample mean
20
+ def self.mean(data_set, attribute)
21
+ index = data_set.get_index(attribute)
22
+ sum = 0.0
23
+ data_set.data_items.each { |item| sum += item[index] }
24
+ return sum / data_set.data_items.length
25
+ end
26
+
27
+ # Get the variance.
28
+ # You can provide the mean if you have it already, to speed up things.
29
+ def self.variance(data_set, attribute, mean = nil)
30
+ index = data_set.get_index(attribute)
31
+ mean = mean(data_set, attribute)
32
+ sum = 0.0
33
+ data_set.data_items.each { |item| sum += (item[index]-mean)**2 }
34
+ return sum / (data_set.data_items.length-1)
35
+ end
36
+
37
+ # Get the standard deviation.
38
+ # You can provide the variance if you have it already, to speed up things.
39
+ def self.standard_deviation(data_set, attribute, variance = nil)
40
+ variance ||= variance(data_set, attribute)
41
+ Math.sqrt(variance)
42
+ end
43
+
44
+ # Get the sample mode.
45
+ def self.mode(data_set, attribute)
46
+ index = data_set.get_index(attribute)
47
+ count = Hash.new {0}
48
+ max_count = 0
49
+ mode = nil
50
+ data_set.data_items.each do |data_item|
51
+ attr_value = data_item[index]
52
+ attr_count = (count[attr_value] += 1)
53
+ if attr_count > max_count
54
+ mode = attr_value
55
+ max_count = attr_count
56
+ end
57
+ end
58
+ return mode
59
+ end
60
+
61
+ # Get the maximum value of an attribute in the data set
62
+ def self.max(data_set, attribute)
63
+ index = data_set.get_index(attribute)
64
+ item = data_set.data_items.max {|x,y| x[index] <=> y[index]}
65
+ return (item) ? item[index] : (-1.0/0)
66
+ end
67
+
68
+ # Get the minimum value of an attribute in the data set
69
+ def self.min(data_set, attribute)
70
+ index = data_set.get_index(attribute)
71
+ item = data_set.data_items.min {|x,y| x[index] <=> y[index]}
72
+ return (item) ? item[index] : (1.0/0)
73
+ end
74
+
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,95 @@
1
+ require 'benchmark'
2
+ require File.dirname(__FILE__) + '/../data/data_set'
3
+
4
+
5
+ module Ai4r
6
+
7
+ module Experiment
8
+
9
+ # The ClassifierEvaluator is useful to compare different classifiers
10
+ # algorithms. The evaluator builds the Classifiers using the same data
11
+ # examples, and provides methods to evalute their performance in parallel.
12
+ # It is a nice tool to compare and evaluate the performance of different
13
+ # algorithms, the same algorithm with different parameters, or your own new
14
+ # algorithm against the classic classifiers.
15
+ class ClassifierEvaluator
16
+
17
+ attr_reader :build_times, :eval_times, :classifiers
18
+
19
+ def initialize
20
+ @classifiers = []
21
+ end
22
+
23
+ # Add a classifier instance to the test batch
24
+ def add_classifier(classifier)
25
+ @classifiers << classifier
26
+ return self
27
+ end
28
+
29
+ alias :<< :add_classifier
30
+
31
+ # Build all classifiers, using data examples found in data_set.
32
+ # The last attribute of each item is considered as the
33
+ # item class.
34
+ # Building times are measured by separate, and can be accessed
35
+ # through build_times attribute reader.
36
+ def build(data_set)
37
+ @build_times = []
38
+ @classifiers.each do |classifier|
39
+ @build_times << Benchmark.measure { classifier.build data_set }
40
+ end
41
+ return self
42
+ end
43
+
44
+ # You can evaluate new data, predicting its class.
45
+ # e.g.
46
+ # classifier.eval(['New York', '<30', 'F'])
47
+ # => ['Y', 'Y', 'Y', 'N', 'Y', 'Y', 'N']
48
+ # Evaluation times are measured by separate, and can be accessed
49
+ # through eval_times attribute reader.
50
+ def eval(data)
51
+ @eval_times = []
52
+ results = []
53
+ @classifiers.each do |classifier|
54
+ @eval_times << Benchmark.measure { results << classifier.eval(data) }
55
+ end
56
+ return results
57
+ end
58
+
59
+ # Test classifiers using a data set. The last attribute of each item
60
+ # is considered as the expected class. Data items are evaluated
61
+ # using all classifiers: evalution times, sucess rate, and quantity of
62
+ # classification errors are returned in a data set.
63
+ # The return data set has a row for every classifier tested, and the
64
+ # following attributes:
65
+ # ["Classifier", "Testing Time", "Errors", "Success rate"]
66
+ def test(data_set)
67
+ result_data_items = []
68
+ @classifiers.each do |classifier|
69
+ result_data_items << test_classifier(classifier, data_set)
70
+ end
71
+ return Ai4r::Data::DataSet.new(:data_items => result_data_items,
72
+ :data_labels => ["Classifier","Testing Time","Errors","Success rate"])
73
+ end
74
+
75
+ private
76
+ def test_classifier(classifier, data_set)
77
+ data_set_size = data_set.data_items.length
78
+ errors = 0
79
+ testing_times = Benchmark.measure do
80
+ data_set.data_items.each do |data_item|
81
+ data = data_item[0...-1]
82
+ expected_result = data_item.last
83
+ result = classifier.eval data
84
+ errors += 1 if result != expected_result
85
+ end
86
+ end
87
+ return [classifier, testing_times.real, errors,
88
+ ((data_set_size-errors*1.0)/data_set_size)]
89
+ end
90
+
91
+ end
92
+
93
+ end
94
+
95
+ end
@@ -0,0 +1,270 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+ module Ai4r
10
+
11
+ # The GeneticAlgorithm module implements the GeneticSearch and Chromosome
12
+ # classes. The GeneticSearch is a generic class, and can be used to solved
13
+ # any kind of problems. The GeneticSearch class performs a stochastic search
14
+ # of the solution of a given problem.
15
+ #
16
+ # The Chromosome is "problem specific". Ai4r built-in Chromosome class was
17
+ # designed to model the Travelling salesman problem. If you want to solve other
18
+ # type of problem, you will have to modify the Chromosome class, by overwriting
19
+ # its fitness, reproduce, and mutate functions, to model your specific problem.
20
+ module GeneticAlgorithm
21
+
22
+ # This class is used to automatically:
23
+ #
24
+ # 1. Choose initial population
25
+ # 2. Evaluate the fitness of each individual in the population
26
+ # 3. Repeat
27
+ # 1. Select best-ranking individuals to reproduce
28
+ # 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
29
+ # 3. Evaluate the individual fitnesses of the offspring
30
+ # 4. Replace worst ranked part of population with offspring
31
+ # 4. Until termination
32
+ #
33
+ # If you want to customize the algorithm, you must modify any of the following classes:
34
+ # - Chromosome
35
+ # - Population
36
+ class GeneticSearch
37
+
38
+ attr_accessor :population
39
+
40
+
41
+ def initialize(initial_population_size, generations)
42
+ @population_size = initial_population_size
43
+ @max_generation = generations
44
+ @generation = 0
45
+ end
46
+
47
+ # 1. Choose initial population
48
+ # 2. Evaluate the fitness of each individual in the population
49
+ # 3. Repeat
50
+ # 1. Select best-ranking individuals to reproduce
51
+ # 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
52
+ # 3. Evaluate the individual fitnesses of the offspring
53
+ # 4. Replace worst ranked part of population with offspring
54
+ # 4. Until termination
55
+ # 5. Return the best chromosome
56
+ def run
57
+ generate_initial_population #Generate initial population
58
+ @max_generation.times do
59
+ selected_to_breed = selection #Evaluates current population
60
+ offsprings = reproduction selected_to_breed #Generate the population for this new generation
61
+ replace_worst_ranked offsprings
62
+ end
63
+ return best_chromosome
64
+ end
65
+
66
+
67
+ def generate_initial_population
68
+ @population = []
69
+ @population_size.times do
70
+ population << Chromosome.seed
71
+ end
72
+ end
73
+
74
+ # Select best-ranking individuals to reproduce
75
+ #
76
+ # Selection is the stage of a genetic algorithm in which individual
77
+ # genomes are chosen from a population for later breeding.
78
+ # There are several generic selection algorithms, such as
79
+ # tournament selection and roulette wheel selection. We implemented the
80
+ # latest.
81
+ #
82
+ # Steps:
83
+ #
84
+ # 1. The fitness function is evaluated for each individual, providing fitness values
85
+ # 2. The population is sorted by descending fitness values.
86
+ # 3. The fitness values ar then normalized. (Highest fitness gets 1, lowest fitness gets 0). The normalized value is stored in the "normalized_fitness" attribute of the chromosomes.
87
+ # 4. A random number R is chosen. R is between 0 and the accumulated normalized value (all the normalized fitness values added togheter).
88
+ # 5. The selected individual is the first one whose accumulated normalized value (its is normalized value plus the normalized values of the chromosomes prior it) greater than R.
89
+ # 6. We repeat steps 4 and 5, 2/3 times the population size.
90
+ def selection
91
+ @population.sort! { |a, b| b.fitness <=> a.fitness}
92
+ best_fitness = @population[0].fitness
93
+ worst_fitness = @population.last.fitness
94
+ acum_fitness = 0
95
+ if best_fitness-worst_fitness > 0
96
+ @population.each do |chromosome|
97
+ chromosome.normalized_fitness = (chromosome.fitness - worst_fitness)/(best_fitness-worst_fitness)
98
+ acum_fitness += chromosome.normalized_fitness
99
+ end
100
+ else
101
+ @population.each { |chromosome| chromosome.normalized_fitness = 1}
102
+ end
103
+ selected_to_breed = []
104
+ ((2*@population_size)/3).times do
105
+ selected_to_breed << select_random_individual(acum_fitness)
106
+ end
107
+ selected_to_breed
108
+ end
109
+
110
+ # We combine each pair of selected chromosome using the method
111
+ # Chromosome.reproduce
112
+ #
113
+ # The reproduction will also call the Chromosome.mutate method with
114
+ # each member of the population. You should implement Chromosome.mutate
115
+ # to only change (mutate) randomly. E.g. You could effectivly change the
116
+ # chromosome only if
117
+ # rand < ((1 - chromosome.normalized_fitness) * 0.4)
118
+ def reproduction(selected_to_breed)
119
+ offsprings = []
120
+ 0.upto(selected_to_breed.length/2-1) do |i|
121
+ offsprings << Chromosome.reproduce(selected_to_breed[2*i], selected_to_breed[2*i+1])
122
+ end
123
+ @population.each do |individual|
124
+ Chromosome.mutate(individual)
125
+ end
126
+ return offsprings
127
+ end
128
+
129
+ # Replace worst ranked part of population with offspring
130
+ def replace_worst_ranked(offsprings)
131
+ size = offsprings.length
132
+ @population = @population [0..((-1*size)-1)] + offsprings
133
+ end
134
+
135
+ # Select the best chromosome in the population
136
+ def best_chromosome
137
+ the_best = @population[0]
138
+ @population.each do |chromosome|
139
+ the_best = chromosome if chromosome.fitness > the_best.fitness
140
+ end
141
+ return the_best
142
+ end
143
+
144
+ private
145
+ def select_random_individual(acum_fitness)
146
+ select_random_target = acum_fitness * rand
147
+ local_acum = 0
148
+ @population.each do |chromosome|
149
+ local_acum += chromosome.normalized_fitness
150
+ return chromosome if local_acum >= select_random_target
151
+ end
152
+ end
153
+
154
+ end
155
+
156
+ # A Chromosome is a representation of an individual solution for a specific
157
+ # problem. You will have to redifine the Chromosome representation for each
158
+ # particular problem, along with its fitness, mutate, reproduce, and seed
159
+ # methods.
160
+ class Chromosome
161
+
162
+ attr_accessor :data
163
+ attr_accessor :normalized_fitness
164
+
165
+ def initialize(data)
166
+ @data = data
167
+ end
168
+
169
+ # The fitness method quantifies the optimality of a solution
170
+ # (that is, a chromosome) in a genetic algorithm so that that particular
171
+ # chromosome may be ranked against all the other chromosomes.
172
+ #
173
+ # Optimal chromosomes, or at least chromosomes which are more optimal,
174
+ # are allowed to breed and mix their datasets by any of several techniques,
175
+ # producing a new generation that will (hopefully) be even better.
176
+ def fitness
177
+ return @fitness if @fitness
178
+ last_token = @data[0]
179
+ cost = 0
180
+ @data[1..-1].each do |token|
181
+ cost += @@costs[last_token][token]
182
+ last_token = token
183
+ end
184
+ @fitness = -1 * cost
185
+ return @fitness
186
+ end
187
+
188
+ # mutation method is used to maintain genetic diversity from one
189
+ # generation of a population of chromosomes to the next. It is analogous
190
+ # to biological mutation.
191
+ #
192
+ # The purpose of mutation in GAs is to allow the
193
+ # algorithm to avoid local minima by preventing the population of
194
+ # chromosomes from becoming too similar to each other, thus slowing or even
195
+ # stopping evolution.
196
+ #
197
+ # Calling the mutate function will "probably" slightly change a chromosome
198
+ # randomly.
199
+ #
200
+ # This implementation of "mutation" will (probably) reverse the
201
+ # order of 2 consecutive randome nodes
202
+ # (e.g. from [ 0, 1, 2, 4] to [0, 2, 1, 4]) if:
203
+ # ((1 - chromosome.normalized_fitness) * 0.4)
204
+ def self.mutate(chromosome)
205
+ if chromosome.normalized_fitness && rand < ((1 - chromosome.normalized_fitness) * 0.3)
206
+ data = chromosome.data
207
+ index = rand(data.length-1)
208
+ data[index], data[index+1] = data[index+1], data[index]
209
+ chromosome.data = data
210
+ @fitness = nil
211
+ end
212
+ end
213
+
214
+ # Reproduction method is used to combine two chromosomes (solutions) into
215
+ # a single new chromosome. There are several ways to
216
+ # combine two chromosomes: One-point crossover, Two-point crossover,
217
+ # "Cut and splice", edge recombination, and more.
218
+ #
219
+ # The method is usually dependant of the problem domain.
220
+ # In this case, we have implemented edge recombination, wich is the
221
+ # most used reproduction algorithm for the Travelling salesman problem.
222
+ def self.reproduce(a, b)
223
+ data_size = @@costs[0].length
224
+ available = []
225
+ 0.upto(data_size-1) { |n| available << n }
226
+ token = a.data[0]
227
+ spawn = [token]
228
+ available.delete(token)
229
+ while available.length > 0 do
230
+ #Select next
231
+ if token != b.data.last && available.include?(b.data[b.data.index(token)+1])
232
+ next_token = b.data[b.data.index(token)+1]
233
+ elsif token != a.data.last && available.include?(a.data[a.data.index(token)+1])
234
+ next_token = a.data[a.data.index(token)+1]
235
+ else
236
+ next_token = available[rand(available.length)]
237
+ end
238
+ #Add to spawn
239
+ token = next_token
240
+ available.delete(token)
241
+ spawn << next_token
242
+ a, b = b, a if rand < 0.4
243
+ end
244
+ return Chromosome.new(spawn)
245
+ end
246
+
247
+ # Initializes an individual solution (chromosome) for the initial
248
+ # population. Usually the chromosome is generated randomly, but you can
249
+ # use some problem domain knowledge, to generate a
250
+ # (probably) better initial solution.
251
+ def self.seed
252
+ data_size = @@costs[0].length
253
+ available = []
254
+ 0.upto(data_size-1) { |n| available << n }
255
+ seed = []
256
+ while available.length > 0 do
257
+ index = rand(available.length)
258
+ seed << available.delete_at(index)
259
+ end
260
+ return Chromosome.new(seed)
261
+ end
262
+
263
+ def self.set_cost_matrix(costs)
264
+ @@costs = costs
265
+ end
266
+ end
267
+
268
+ end
269
+
270
+ end