ai4r 1.1 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (140) hide show
  1. data/README.rdoc +21 -20
  2. data/examples/decision_trees/id3_example.rb +3 -2
  3. data/examples/genetic_algorithm/genetic_algorithm_example.rb +6 -6
  4. data/examples/neural_network/backpropagation_example.rb +2 -2
  5. data/lib/ai4r/classifiers/classifier_helper.rb +54 -0
  6. data/lib/ai4r/classifiers/id3.rb +356 -0
  7. data/lib/ai4r/classifiers/one_r.rb +148 -0
  8. data/lib/ai4r/classifiers/prism.rb +231 -0
  9. data/lib/ai4r/classifiers/zero_r.rb +104 -0
  10. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +272 -0
  11. data/lib/ai4r/neural_network/backpropagation.rb +271 -0
  12. data/site/build/tmp/locationmap.xml +14 -14
  13. data/site/build/tmp/output.xmap +23 -23
  14. data/site/build/tmp/pluginlist2fetchbuild.xml +144 -144
  15. data/site/build/tmp/plugins-1.xml +0 -11
  16. data/site/build/tmp/plugins-2.xml +54 -0
  17. data/site/build/tmp/projfilters.properties +41 -41
  18. data/site/build/webapp/WEB-INF/logs/core.log +681 -788
  19. data/site/build/webapp/WEB-INF/logs/error.log +281 -248
  20. data/site/build/webapp/WEB-INF/logs/sitemap.log +1015 -0
  21. data/site/src/documentation/content/xdocs/forum.html +9 -0
  22. data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +82 -68
  23. data/site/src/documentation/content/xdocs/index.xml +47 -18
  24. data/site/src/documentation/content/xdocs/machineLearning.xml +10 -9
  25. data/site/src/documentation/content/xdocs/neuralNetworks.xml +60 -36
  26. data/site/src/documentation/content/xdocs/site.xml +8 -5
  27. data/site/src/documentation/content/xdocs/svn.xml +11 -1
  28. data/site/src/documentation/resources/images/Thumbs.db +0 -0
  29. data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
  30. data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
  31. data/site/src/documentation/resources/images/jadeferret.png +0 -0
  32. data/site/src/documentation/resources/images/neural_network_example.png +0 -0
  33. data/site/src/documentation/resources/images/sub-dir/Thumbs.db +0 -0
  34. data/site/src/documentation/skinconf.xml +18 -18
  35. data/test/classifiers/id3_test.rb +206 -0
  36. data/test/classifiers/one_r_test.rb +62 -0
  37. data/test/classifiers/prism_test.rb +83 -0
  38. data/test/classifiers/zero_r_test.rb +48 -0
  39. data/test/genetic_algorithm/chromosome_test.rb +41 -38
  40. data/test/genetic_algorithm/genetic_algorithm_test.rb +64 -61
  41. data/test/neural_network/backpropagation_test.rb +20 -18
  42. metadata +109 -199
  43. data/lib/decision_tree/id3.rb +0 -354
  44. data/lib/genetic_algorithm/genetic_algorithm.rb +0 -268
  45. data/lib/neural_network/backpropagation.rb +0 -264
  46. data/site/build/site/en/broken-links.xml +0 -2
  47. data/site/build/site/en/downloads.html +0 -187
  48. data/site/build/site/en/downloads.pdf +0 -151
  49. data/site/build/site/en/geneticAlgorithms.html +0 -564
  50. data/site/build/site/en/geneticAlgorithms.pdf +0 -911
  51. data/site/build/site/en/images/ai4r-logo.png +0 -0
  52. data/site/build/site/en/images/built-with-forrest-button.png +0 -0
  53. data/site/build/site/en/images/c.png +0 -0
  54. data/site/build/site/en/images/c_wbn.png +0 -0
  55. data/site/build/site/en/images/c_wn.png +0 -0
  56. data/site/build/site/en/images/ero.gif +0 -0
  57. data/site/build/site/en/images/europe2.png +0 -0
  58. data/site/build/site/en/images/europe3.png +0 -0
  59. data/site/build/site/en/images/fitness.png +0 -0
  60. data/site/build/site/en/images/instruction_arrow.png +0 -0
  61. data/site/build/site/en/images/my_email.png +0 -0
  62. data/site/build/site/en/images/rubyforge.png +0 -0
  63. data/site/build/site/en/images/s.png +0 -0
  64. data/site/build/site/en/images/s_wbn.png +0 -0
  65. data/site/build/site/en/images/s_wn.png +0 -0
  66. data/site/build/site/en/images/sigmoid.png +0 -0
  67. data/site/build/site/en/images/t.png +0 -0
  68. data/site/build/site/en/images/t_wbn.png +0 -0
  69. data/site/build/site/en/images/t_wn.png +0 -0
  70. data/site/build/site/en/index.html +0 -258
  71. data/site/build/site/en/index.pdf +0 -306
  72. data/site/build/site/en/linkmap.html +0 -231
  73. data/site/build/site/en/linkmap.pdf +0 -94
  74. data/site/build/site/en/locationmap.xml +0 -72
  75. data/site/build/site/en/machineLearning.html +0 -325
  76. data/site/build/site/en/machineLearning.pdf +0 -337
  77. data/site/build/site/en/neuralNetworks.html +0 -446
  78. data/site/build/site/en/neuralNetworks.pdf +0 -604
  79. data/site/build/site/en/skin/CommonMessages_de.xml +0 -23
  80. data/site/build/site/en/skin/CommonMessages_en_US.xml +0 -23
  81. data/site/build/site/en/skin/CommonMessages_es.xml +0 -23
  82. data/site/build/site/en/skin/CommonMessages_fr.xml +0 -23
  83. data/site/build/site/en/skin/basic.css +0 -166
  84. data/site/build/site/en/skin/breadcrumbs-optimized.js +0 -90
  85. data/site/build/site/en/skin/breadcrumbs.js +0 -237
  86. data/site/build/site/en/skin/fontsize.js +0 -166
  87. data/site/build/site/en/skin/getBlank.js +0 -40
  88. data/site/build/site/en/skin/getMenu.js +0 -45
  89. data/site/build/site/en/skin/images/README.txt +0 -1
  90. data/site/build/site/en/skin/images/add.jpg +0 -0
  91. data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
  92. data/site/build/site/en/skin/images/chapter.gif +0 -0
  93. data/site/build/site/en/skin/images/chapter_open.gif +0 -0
  94. data/site/build/site/en/skin/images/current.gif +0 -0
  95. data/site/build/site/en/skin/images/error.png +0 -0
  96. data/site/build/site/en/skin/images/external-link.gif +0 -0
  97. data/site/build/site/en/skin/images/fix.jpg +0 -0
  98. data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
  99. data/site/build/site/en/skin/images/hack.jpg +0 -0
  100. data/site/build/site/en/skin/images/header_white_line.gif +0 -0
  101. data/site/build/site/en/skin/images/info.png +0 -0
  102. data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
  103. data/site/build/site/en/skin/images/label.gif +0 -0
  104. data/site/build/site/en/skin/images/page.gif +0 -0
  105. data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
  106. data/site/build/site/en/skin/images/poddoc.png +0 -0
  107. data/site/build/site/en/skin/images/printer.gif +0 -0
  108. data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
  109. data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
  110. data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  111. data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
  112. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
  113. data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  114. data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
  115. data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
  116. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
  117. data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
  118. data/site/build/site/en/skin/images/remove.jpg +0 -0
  119. data/site/build/site/en/skin/images/rss.png +0 -0
  120. data/site/build/site/en/skin/images/spacer.gif +0 -0
  121. data/site/build/site/en/skin/images/success.png +0 -0
  122. data/site/build/site/en/skin/images/txtdoc.png +0 -0
  123. data/site/build/site/en/skin/images/update.jpg +0 -0
  124. data/site/build/site/en/skin/images/valid-html401.png +0 -0
  125. data/site/build/site/en/skin/images/vcss.png +0 -0
  126. data/site/build/site/en/skin/images/warning.png +0 -0
  127. data/site/build/site/en/skin/images/xmldoc.gif +0 -0
  128. data/site/build/site/en/skin/menu.js +0 -48
  129. data/site/build/site/en/skin/note.txt +0 -50
  130. data/site/build/site/en/skin/print.css +0 -54
  131. data/site/build/site/en/skin/profile.css +0 -163
  132. data/site/build/site/en/skin/prototype.js +0 -1257
  133. data/site/build/site/en/skin/screen.css +0 -587
  134. data/site/build/site/en/svn.html +0 -223
  135. data/site/build/site/en/svn.pdf +0 -239
  136. data/site/build/site/en/wholesite.pdf +0 -1686
  137. data/site/build/tmp/brokenlinks.xml +0 -2
  138. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  139. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  140. data/test/decision_tree/id3_test.rb +0 -209
@@ -1,354 +0,0 @@
1
-
2
- # Decision tree learning, used in data mining and machine learning,
3
- # uses a decision tree as a predictive model which maps observations about
4
- # an item to conclusions about the item's target value.
5
- #
6
- # In this module you will find an implementation of the ID3 algorithm (Quinlan)
7
- #
8
- # * http://en.wikipedia.org/wiki/Decision_tree
9
- # * http://en.wikipedia.org/wiki/ID3_algorithm
10
- #
11
- # Author:: Sergio Fierens
12
- # License:: MPL 1.1
13
- # Project:: ai4r
14
- # Url:: http://ai4r.rubyforge.org/
15
- #
16
- # You can redistribute it and/or modify it under the terms of
17
- # the Mozilla Public License version 1.1 as published by the
18
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
19
-
20
- module DecisionTree
21
-
22
- # = Introduction
23
- # This is an implementation of the ID3 algorithm (Quinlan)
24
- # Given a set of preclassified examples, it builds a top-down
25
- # induction of decision tree, biased by the information gain and
26
- # entropy measure.
27
- #
28
- # = How to use it
29
- #
30
- # DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target' ]
31
- #
32
- # DATA_SET = [ ['New York', '<30', 'M', 'Y'],
33
- # ['Chicago', '<30', 'M', 'Y'],
34
- # ['Chicago', '<30', 'F', 'Y'],
35
- # ['New York', '<30', 'M', 'Y'],
36
- # ['New York', '<30', 'M', 'Y'],
37
- # ['Chicago', '[30-50)', 'M', 'Y'],
38
- # ['New York', '[30-50)', 'F', 'N'],
39
- # ['Chicago', '[30-50)', 'F', 'Y'],
40
- # ['New York', '[30-50)', 'F', 'N'],
41
- # ['Chicago', '[50-80]', 'M', 'N'],
42
- # ['New York', '[50-80]', 'F', 'N'],
43
- # ['New York', '[50-80]', 'M', 'N'],
44
- # ['Chicago', '[50-80]', 'M', 'N'],
45
- # ['New York', '[50-80]', 'F', 'N'],
46
- # ['Chicago', '>80', 'F', 'Y']
47
- # ]
48
- #
49
- # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
50
- #
51
- # id3.to_s
52
- # # => if age_range=='<30' then marketing_target='Y'
53
- # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
54
- # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
55
- # elsif age_range=='[50-80]' then marketing_target='N'
56
- # elsif age_range=='>80' then marketing_target='Y'
57
- # else raise 'There was not enough information during training to do a proper induction for this data element' end
58
- #
59
- # id3.eval(['New York', '<30', 'M'])
60
- # # => 'Y'
61
- #
62
- # = A better way to load the data
63
- #
64
- # In the real life you will use lot more data training examples, with more
65
- # attributes. Consider moving your data to an external CSV (comma separate
66
- # values) file.
67
- #
68
- # data_set = []
69
- # CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
70
- # data_set << row
71
- # end
72
- # data_labels = data_set.shift
73
- #
74
- # id3 = DecisionTree::ID3.new(data_set, data_labels)
75
- #
76
- # = A nice tip for data evaluation
77
- #
78
- # id3 = DecisionTree::ID3.new(DATA_SET, DATA_LABELS)
79
- # age_range = '<30'
80
- # marketing_target = nil
81
- # eval id3.to_s
82
- # puts marketing_target
83
- # # => 'Y'
84
- # = More about ID3 and decision trees
85
- #
86
- # * http://en.wikipedia.org/wiki/Decision_tree
87
- # * http://en.wikipedia.org/wiki/ID3_algorithm
88
- #
89
- # = About the project
90
- # Author:: Sergio Fierens
91
- # License:: MPL 1.1
92
-
93
- class ID3
94
- attr_reader :data_labels
95
- # Create a new decision tree. If your data is classified with N attributed
96
- # and M examples, then your data examples must have the following format:
97
- #
98
- # [ [ATT1_VAL1, ATT2_VAL1, ATT3_VAL1, ... , ATTN_VAL1, CATEGORY_VAL1],
99
- # [ATT1_VAL2, ATT2_VAL2, ATT3_VAL2, ... , ATTN_VAL2, CATEGORY_VAL2],
100
- # ...
101
- # [ATTM1_VALM, ATT2_VALM, ATT3_VALM, ... , ATTN_VALM, CATEGORY_VALM],
102
- # ]
103
- #
104
- # e.g.
105
- # [ ['New York', '<30', 'M', 'Y'],
106
- # ['Chicago', '<30', 'M', 'Y'],
107
- # ['Chicago', '<30', 'F', 'Y'],
108
- # ['New York', '<30', 'M', 'Y'],
109
- # ['New York', '<30', 'M', 'Y'],
110
- # ['Chicago', '[30-50)', 'M', 'Y'],
111
- # ['New York', '[30-50)', 'F', 'N'],
112
- # ['Chicago', '[30-50)', 'F', 'Y'],
113
- # ['New York', '[30-50)', 'F', 'N'],
114
- # ['Chicago', '[50-80]', 'M', 'N'],
115
- # ['New York', '[50-80]', 'F', 'N'],
116
- # ['New York', '[50-80]', 'M', 'N'],
117
- # ['Chicago', '[50-80]', 'M', 'N'],
118
- # ['New York', '[50-80]', 'F', 'N'],
119
- # ['Chicago', '>80', 'F', 'Y']
120
- # ]
121
- #
122
- # Data labels must have the following format:
123
- # [ 'city', 'age_range', 'gender', 'marketing_target' ]
124
- #
125
- # If you do not provide labels for you data, the following labels will
126
- # be created by default:
127
- # [ 'ATTRIBUTE_1', 'ATTRIBUTE_2', 'ATTRIBUTE_3', 'CATEGORY' ]
128
- #
129
- def initialize(data_examples, data_labels=nil)
130
- raise "Examples data set must not be empty." if !data_examples || data_examples.empty?
131
- if !data_labels
132
- data_labels = []
133
- data_examples[0][0..-2].each_index do |i|
134
- data_labels[i] = "ATTRIBUTE_#{i+1}"
135
- end
136
- data_labels[data_labels.length]="CATEGORY"
137
- end
138
- @data_labels = data_labels
139
- preprocess_data(data_examples)
140
- end
141
-
142
- # You can evaluate new data, predicting its category.
143
- # e.g.
144
- # id3.eval(['New York', '<30', 'F']) # => 'Y'
145
- def eval(data)
146
- @tree.value(data)
147
- end
148
-
149
- # This method returns the generated rules in ruby code.
150
- # e.g.
151
- #
152
- # id3.to_s
153
- # # => if age_range=='<30' then marketing_target='Y'
154
- # elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
155
- # elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
156
- # elsif age_range=='[50-80]' then marketing_target='N'
157
- # elsif age_range=='>80' then marketing_target='Y'
158
- # else raise 'There was not enough information during training to do a proper induction for this data element' end
159
- #
160
- # It is a nice way to inspect induction results, and also to execute them:
161
- # age_range = '<30'
162
- # marketing_target = nil
163
- # eval id3.to_s
164
- # puts marketing_target
165
- # # => 'Y'
166
- def to_s
167
- rules = @tree.get_rules
168
- rules = rules.collect do |rule|
169
- "#{rule[0..-2].join(' and ')} then #{rule.last}"
170
- end
171
- return "if #{rules.join("\nelsif ")}\nelse raise 'There was not enough information during training to do a proper induction for this data element' end"
172
- end
173
-
174
- private
175
- def preprocess_data(data_examples)
176
- @tree = build_node(data_examples)
177
- end
178
-
179
- private
180
- def build_node(data_examples, flag_att = [])
181
- return ErrorNode.new if data_examples.length == 0
182
- domain = domain(data_examples)
183
- return CategoryNode.new(@data_labels.last, domain.last[0]) if domain.last.length == 1
184
- min_entropy_index = min_entropy_index(data_examples, domain, flag_att)
185
- flag_att << min_entropy_index
186
- split_data_examples = split_data_examples(data_examples, domain, min_entropy_index)
187
- return CategoryNode.new(@data_labels.last, most_freq(data_examples, domain)) if split_data_examples.length == 1
188
- nodes = split_data_examples.collect do |partial_data_examples|
189
- build_node(partial_data_examples, flag_att)
190
- end
191
- return EvaluationNode.new(@data_labels, min_entropy_index, domain[min_entropy_index], nodes)
192
- end
193
-
194
- private
195
- def self.sum(values)
196
- values.inject( 0 ) { |sum,x| sum+x }
197
- end
198
-
199
- private
200
- def self.log2(z)
201
- return 0.0 if z == 0
202
- Math.log(z)/LOG2
203
- end
204
-
205
- private
206
- def most_freq(examples, domain)
207
- freqs = []
208
- domain.last.length.times { freqs << 0}
209
- examples.each do |example|
210
- cat_index = domain.last.index(example.last)
211
- freq = freqs[cat_index] + 1
212
- freqs[cat_index] = freq
213
- end
214
- max_freq = freqs.max
215
- max_freq_index = freqs.index(max_freq)
216
- domain.last[max_freq_index]
217
- end
218
-
219
- private
220
- def split_data_examples(data_examples, domain, att_index)
221
- data_examples_array = []
222
- att_value_examples = {}
223
- data_examples.each do |example|
224
- example_set = att_value_examples[example[att_index]]
225
- example_set = [] if !example_set
226
- example_set << example
227
- att_value_examples.store(example[att_index], example_set)
228
- end
229
- att_value_examples.each_pair do |att_value, example_set|
230
- att_value_index = domain[att_index].index(att_value)
231
- data_examples_array[att_value_index] = example_set
232
- end
233
- return data_examples_array
234
- end
235
-
236
- private
237
- def min_entropy_index(data_examples, domain, flag_att=[])
238
- min_entropy = nil
239
- min_index = 0
240
- domain[0..-2].each_index do |index|
241
- freq_grid = freq_grid(index, data_examples, domain)
242
- entropy = entropy(freq_grid, data_examples.length)
243
- if (!min_entropy || entropy < min_entropy) && !flag_att.include?(index)
244
- min_entropy = entropy
245
- min_index = index
246
- end
247
- end
248
- return min_index
249
- end
250
-
251
- private
252
- def domain(data_examples)
253
- domain = []
254
- @data_labels.length.times { domain << [] }
255
- data_examples.each do |data|
256
- data.each_index do |i|
257
- domain[i] << data[i] if i<domain.length && !domain[i].include?(data[i])
258
- end
259
- end
260
- return domain
261
- end
262
-
263
- private
264
- def freq_grid(att_index, data_examples, domain)
265
- #Initialize empty grid
266
- grid_element = []
267
- domain.last.length.times { grid_element << 0}
268
- grid = []
269
- domain[att_index].length.times { grid << grid_element.clone }
270
- #Fill frecuency with grid
271
- data_examples.each do |example|
272
- att_val = example[att_index]
273
- att_val_index = domain[att_index].index(att_val)
274
- category = example.last
275
- category_index = domain.last.index(category)
276
- freq = grid[att_val_index][category_index] + 1
277
- grid[att_val_index][category_index] = freq
278
- end
279
- return grid
280
- end
281
-
282
- private
283
- def entropy(freq_grid, total_examples)
284
- #Calc entropy of each element
285
- entropy = 0
286
- freq_grid.each do |att_freq|
287
- att_total_freq = ID3.sum(att_freq)
288
- partial_entropy = 0
289
- if att_total_freq != 0
290
- att_freq.each do |freq|
291
- prop = freq.to_f/att_total_freq
292
- partial_entropy += (-1*prop*ID3.log2(prop))
293
- end
294
- end
295
- entropy += (att_total_freq.to_f/total_examples) * partial_entropy
296
- end
297
- return entropy
298
- end
299
-
300
- private
301
- LOG2 = Math.log(2)
302
- end
303
-
304
- class EvaluationNode
305
- attr_reader :index, :values, :nodes
306
- def initialize(data_labels, index, values, nodes)
307
- @index = index
308
- @values = values
309
- @nodes = nodes
310
- @data_labels = data_labels
311
- end
312
- def value(data)
313
- value = data[@index]
314
- return rule_not_found if !@values.include?(value)
315
- return nodes[@values.index(value)].value(data)
316
- end
317
- def get_rules
318
- rule_set = []
319
- @nodes.each_index do |child_node_index|
320
- my_rule = "#{@data_labels[@index]}=='#{@values[child_node_index]}'"
321
- child_node = @nodes[child_node_index]
322
- child_node_rules = child_node.get_rules
323
- child_node_rules.each do |child_rule|
324
- child_rule.unshift(my_rule)
325
- end
326
- rule_set += child_node_rules
327
- end
328
- return rule_set
329
- end
330
- end
331
-
332
- class CategoryNode
333
- def initialize(label, value)
334
- @label = label
335
- @value = value
336
- end
337
- def value(data)
338
- return @value
339
- end
340
- def get_rules
341
- return [["#{@label}='#{@value}'"]]
342
- end
343
- end
344
-
345
- class ErrorNode
346
- def value(data)
347
- raise "There was not enough information during training to do a proper induction for this data element."
348
- end
349
- def get_rules
350
- return []
351
- end
352
- end
353
-
354
- end
@@ -1,268 +0,0 @@
1
- #
2
- # The GeneticAlgorithm module implements the GeneticSearch and Chromosome
3
- # classes. The GeneticSearch is a generic class, and can be used to solved
4
- # any kind of problems. The GeneticSearch class performs a stochastic search
5
- # of the solution of a given problem.
6
- #
7
- # The Chromosome is "problem specific". Ai4r built-in Chromosomeclass was
8
- # designed to model the Travelling salesman problem. If you want to solve other
9
- # type of problem, you will have to modify the Chromosome class, by overwriting
10
- # its fitness, reproduce, and mutate functions, to model you specific problem.
11
- #
12
- # Author:: Sergio Fierens
13
- # License:: MPL 1.1
14
- # Project:: ai4r
15
- # Url:: http://ai4r.rubyforge.org/
16
- #
17
- # You can redistribute it and/or modify it under the terms of
18
- # the Mozilla Public License version 1.1 as published by the
19
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
20
-
21
- module GeneticAlgorithm
22
-
23
- # This class is used to automatically:
24
- #
25
- # 1. Choose initial population
26
- # 2. Evaluate the fitness of each individual in the population
27
- # 3. Repeat
28
- # 1. Select best-ranking individuals to reproduce
29
- # 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
30
- # 3. Evaluate the individual fitnesses of the offspring
31
- # 4. Replace worst ranked part of population with offspring
32
- # 4. Until termination
33
- #
34
- # If you want to customize the algorithm, you must modify any of the following classes:
35
- # - Chromosome
36
- # - Population
37
- class GeneticSearch
38
-
39
- attr_accessor :population
40
-
41
-
42
- def initialize(initial_population_size, generations)
43
- @population_size = initial_population_size
44
- @max_generation = generations
45
- @generation = 0
46
- end
47
-
48
- # 1. Choose initial population
49
- # 2. Evaluate the fitness of each individual in the population
50
- # 3. Repeat
51
- # 1. Select best-ranking individuals to reproduce
52
- # 2. Breed new generation through crossover and mutation (genetic operations) and give birth to offspring
53
- # 3. Evaluate the individual fitnesses of the offspring
54
- # 4. Replace worst ranked part of population with offspring
55
- # 4. Until termination
56
- # 5. Return the best chromosome
57
- def run
58
- generate_initial_population #Generate initial population
59
- @max_generation.times do
60
- selected_to_breed = selection #Evaluates current population
61
- offsprings = reproduction selected_to_breed #Generate the population for this new generation
62
- replace_worst_ranked offsprings
63
- end
64
- return best_chromosome
65
- end
66
-
67
-
68
- def generate_initial_population
69
- @population = []
70
- @population_size.times do
71
- population << Chromosome.seed
72
- end
73
- end
74
-
75
- # Select best-ranking individuals to reproduce
76
- #
77
- # Selection is the stage of a genetic algorithm in which individual
78
- # genomes are chosen from a population for later breeding.
79
- # There are several generic selection algorithms, such as
80
- # tournament selection and roulette wheel selection. We implemented the
81
- # latest.
82
- #
83
- # Steps:
84
- #
85
- # 1. The fitness function is evaluated for each individual, providing fitness values
86
- # 2. The population is sorted by descending fitness values.
87
- # 3. The fitness values ar then normalized. (Highest fitness gets 1, lowest fitness gets 0). The normalized value is stored in the "normalized_fitness" attribute of the chromosomes.
88
- # 4. A random number R is chosen. R is between 0 and the accumulated normalized value (all the normalized fitness values added togheter).
89
- # 5. The selected individual is the first one whose accumulated normalized value (its is normalized value plus the normalized values of the chromosomes prior it) greater than R.
90
- # 6. We repeat steps 4 and 5, 2/3 times the population size.
91
- def selection
92
- @population.sort! { |a, b| b.fitness <=> a.fitness}
93
- best_fitness = @population[0].fitness
94
- worst_fitness = @population.last.fitness
95
- acum_fitness = 0
96
- if best_fitness-worst_fitness > 0
97
- @population.each do |chromosome|
98
- chromosome.normalized_fitness = (chromosome.fitness - worst_fitness)/(best_fitness-worst_fitness)
99
- acum_fitness += chromosome.normalized_fitness
100
- end
101
- else
102
- @population.each { |chromosome| chromosome.normalized_fitness = 1}
103
- end
104
- selected_to_breed = []
105
- ((2*@population_size)/3).times do
106
- selected_to_breed << select_random_individual(acum_fitness)
107
- end
108
- selected_to_breed
109
- end
110
-
111
- # We combine each pair of selected chromosome using the method
112
- # Chromosome.reproduce
113
- #
114
- # The reproduction will also call the Chromosome.mutate method with
115
- # each member of the population. You should implement Chromosome.mutate
116
- # to only change (mutate) randomly. E.g. You could effectivly change the
117
- # chromosome only if
118
- # rand < ((1 - chromosome.normalized_fitness) * 0.4)
119
- def reproduction(selected_to_breed)
120
- offsprings = []
121
- 0.upto(selected_to_breed.length/2-1) do |i|
122
- offsprings << Chromosome.reproduce(selected_to_breed[2*i], selected_to_breed[2*i+1])
123
- end
124
- @population.each do |individual|
125
- Chromosome.mutate(individual)
126
- end
127
- return offsprings
128
- end
129
-
130
- # Replace worst ranked part of population with offspring
131
- def replace_worst_ranked(offsprings)
132
- size = offsprings.length
133
- @population = @population [0..((-1*size)-1)] + offsprings
134
- end
135
-
136
- # Select the best chromosome in the population
137
- def best_chromosome
138
- the_best = @population[0]
139
- @population.each do |chromosome|
140
- the_best = chromosome if chromosome.fitness > the_best.fitness
141
- end
142
- return the_best
143
- end
144
-
145
- private
146
- def select_random_individual(acum_fitness)
147
- select_random_target = acum_fitness * rand
148
- local_acum = 0
149
- @population.each do |chromosome|
150
- local_acum += chromosome.normalized_fitness
151
- return chromosome if local_acum >= select_random_target
152
- end
153
- end
154
-
155
- end
156
-
157
- # A Chromosome is a representation of an individual solutions for a specific
158
- # problem. You will have to redifine you Chromosome representation for each
159
- # particular problem, along with its fitness, mutate, reproduce, and seed
160
- # functions.
161
- class Chromosome
162
-
163
- attr_accessor :data
164
- attr_accessor :normalized_fitness
165
-
166
- def initialize(data)
167
- @data = data
168
- end
169
-
170
- # The fitness function quantifies the optimality of a solution
171
- # (that is, a chromosome) in a genetic algorithm so that that particular
172
- # chromosome may be ranked against all the other chromosomes.
173
- #
174
- # Optimal chromosomes, or at least chromosomes which are more optimal,
175
- # are allowed to breed and mix their datasets by any of several techniques,
176
- # producing a new generation that will (hopefully) be even better.
177
- def fitness
178
- return @fitness if @fitness
179
- last_token = @data[0]
180
- cost = 0
181
- @data[1..-1].each do |token|
182
- cost += @@costs[last_token][token]
183
- last_token = token
184
- end
185
- @fitness = -1 * cost
186
- return @fitness
187
- end
188
-
189
- # mutation is a function used to maintain genetic diversity from one
190
- # generation of a population of chromosomes to the next. It is analogous
191
- # to biological mutation.
192
- #
193
- # The purpose of mutation in GAs is to allow the
194
- # algorithm to avoid local minima by preventing the population of
195
- # chromosomes from becoming too similar to each other, thus slowing or even
196
- # stopping evolution.
197
- #
198
- # Calling the mutate function will "probably" slightly change a chromosome
199
- # randomly.
200
- #
201
- # This implementation of "mutation" will (probably) reverse the
202
- # order of 2 consecutive randome nodes
203
- # (e.g. from [ 0, 1, 2, 4] to [0, 2, 1, 4]) if:
204
- # ((1 - chromosome.normalized_fitness) * 0.4)
205
- def self.mutate(chromosome)
206
- if chromosome.normalized_fitness && rand < ((1 - chromosome.normalized_fitness) * 0.3)
207
- data = chromosome.data
208
- index = rand(data.length-1)
209
- data[index], data[index+1] = data[index+1], data[index]
210
- chromosome.data = data
211
- @fitness = nil
212
- end
213
- end
214
-
215
- # Reproduction is used to vary the programming of a chromosome or
216
- # chromosomes from one generation to the next. There are several ways to
217
- # combine two chromosomes: One-point crossover, Two-point crossover,
218
- # "Cut and splice", edge recombination, and more.
219
- #
220
- # The method is usually dependant of the problem domain.
221
- # In this case, we have implemented edge recombination, wich is the
222
- # most used reproduction algorithm for the Travelling salesman problem.
223
- def self.reproduce(a, b)
224
- data_size = @@costs[0].length
225
- available = []
226
- 0.upto(data_size-1) { |n| available << n }
227
- token = a.data[0]
228
- spawn = [token]
229
- available.delete(token)
230
- while available.length > 0 do
231
- #Select next
232
- if token != b.data.last && available.include?(b.data[b.data.index(token)+1])
233
- next_token = b.data[b.data.index(token)+1]
234
- elsif token != a.data.last && available.include?(a.data[a.data.index(token)+1])
235
- next_token = a.data[a.data.index(token)+1]
236
- else
237
- next_token = available[rand(available.length)]
238
- end
239
- #Add to spawn
240
- token = next_token
241
- available.delete(token)
242
- spawn << next_token
243
- a, b = b, a if rand < 0.4
244
- end
245
- return Chromosome.new(spawn)
246
- end
247
-
248
- # Initializes an individual solution (chromosome) for the initial
249
- # population. Usually the chromosome is generated randomly, but you can
250
- # use some problem domain knowledge, to generate better initial solutions.
251
- def self.seed
252
- data_size = @@costs[0].length
253
- available = []
254
- 0.upto(data_size-1) { |n| available << n }
255
- seed = []
256
- while available.length > 0 do
257
- index = rand(available.length)
258
- seed << available.delete_at(index)
259
- end
260
- return Chromosome.new(seed)
261
- end
262
-
263
- def self.set_cost_matrix(costs)
264
- @@costs = costs
265
- end
266
- end
267
-
268
- end